diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -63,6 +63,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" @@ -81,6 +82,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; #define DEBUG_TYPE "tailcallelim" @@ -92,7 +94,10 @@ /// Scan the specified function for alloca instructions. /// If it contains any dynamic allocas, returns false. static bool canTRE(Function &F) { - // Because of PR962, we don't TRE dynamic allocas. + // TODO: We don't do TRE if dynamic allocas are used. + // Dynamic allocas allocate stack space which should be + // deallocated before new iteration started. That is + // currently not implemented. return llvm::all_of(instructions(F), [](Instruction &I) { auto *AI = dyn_cast(&I); return !AI || AI->isStaticAlloca(); @@ -185,11 +190,9 @@ }; } -static bool markTails(Function &F, bool &AllCallsAreTailCalls, - OptimizationRemarkEmitter *ORE) { +static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) { if (F.callsFunctionThatReturnsTwice()) return false; - AllCallsAreTailCalls = true; // The local stack holds all alloca instructions and all byval arguments. AllocaDerivedValueTracker Tracker; @@ -272,11 +275,8 @@ } } - if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { + if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) DeferredTails.push_back(CI); - } else { - AllCallsAreTailCalls = false; - } } for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { @@ -313,8 +313,6 @@ LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n"); CI->setTailCall(); Modified = true; - } else { - AllCallsAreTailCalls = false; } } @@ -326,6 +324,9 @@ /// instructions between the call and this instruction are movable. /// static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) { + if (isa(I)) + return true; + // FIXME: We can move load/store/call/free instructions above the call if the // call does not mod/ref the memory location being processed. if (I->mayHaveSideEffects()) // This also handles volatile loads. @@ -392,7 +393,6 @@ // createTailRecurseLoopHeader the first time we find a call we can eliminate. BasicBlock *HeaderBB = nullptr; SmallVector ArgumentPHIs; - bool RemovableCallsMustBeMarkedTail = false; // PHI node to store our return value. PHINode *RetPN = nullptr; @@ -419,8 +419,7 @@ DomTreeUpdater &DTU) : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {} - CallInst *findTRECandidate(Instruction *TI, - bool CannotTailCallElimCallsMarkedTail); + CallInst *findTRECandidate(Instruction *TI); void createTailRecurseLoopHeader(CallInst *CI); @@ -428,11 +427,9 @@ bool eliminateCall(CallInst *CI); - bool foldReturnAndProcessPred(ReturnInst *Ret, - bool CannotTailCallElimCallsMarkedTail); + bool foldReturnAndProcessPred(ReturnInst *Ret); - bool processReturningBlock(ReturnInst *Ret, - bool CannotTailCallElimCallsMarkedTail); + bool processReturningBlock(ReturnInst *Ret); void cleanupAndFinalize(); @@ -443,8 +440,7 @@ }; } // namespace -CallInst *TailRecursionEliminator::findTRECandidate( - Instruction *TI, bool CannotTailCallElimCallsMarkedTail) { +CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) { BasicBlock *BB = TI->getParent(); if (&BB->front() == TI) // Make sure there is something before the terminator. @@ -464,9 +460,9 @@ --BBI; } - // If this call is marked as a tail call, and if there are dynamic allocas in - // the function, we cannot perform this optimization. - if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail) + assert((!CI->isTailCall() || !CI->isNoTailCall()) && + "Incompatible call site attributes(Tail,NoTail)"); + if (!CI->isTailCall()) return nullptr; // As a special case, detect code like this: @@ -498,26 +494,13 @@ BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry); BI->setDebugLoc(CI->getDebugLoc()); - // If this function has self recursive calls in the tail position where some - // are marked tail and some are not, only transform one flavor or another. - // We have to choose whether we move allocas in the entry block to the new - // entry block or not, so we can't make a good choice for both. We make this - // decision here based on whether the first call we found to remove is - // marked tail. - // NOTE: We could do slightly better here in the case that the function has - // no entry block allocas. - RemovableCallsMustBeMarkedTail = CI->isTailCall(); - - // If this tail call is marked 'tail' and if there are any allocas in the - // entry block, move them up to the new entry block. - if (RemovableCallsMustBeMarkedTail) - // Move all fixed sized allocas from HeaderBB to NewEntry. - for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), - NEBI = NewEntry->begin(); - OEBI != E;) - if (AllocaInst *AI = dyn_cast(OEBI++)) - if (isa(AI->getArraySize())) - AI->moveBefore(&*NEBI); + // Move all fixed sized allocas from HeaderBB to NewEntry. + for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(), + NEBI = NewEntry->begin(); + OEBI != E;) + if (AllocaInst *AI = dyn_cast(OEBI++)) + if (isa(AI->getArraySize())) + AI->moveBefore(&*NEBI); // Now that we have created a new block, which jumps to the entry // block, insert a PHI node for each argument of the function. @@ -582,8 +565,42 @@ ++NumAccumAdded; } +// this function checks whether lifetime marker could be skipped +// or deleted. When lifetime marker points to local stack +// variable used for byvalue parameter of recursive function +// - TRE could be done, lifetime marker should be removed. +// When lifetime marker points to local stack variable not used +// as recursive function parameter - TRE could be done, lifetime +// marker should be left inplace. In all other cases TRE could +// not be done. +static bool +canSkipLifetimeMarker(Instruction *I, CallInst *CI, + SmallVector &InstrsToRemove) { + if (const IntrinsicInst *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_end) { + if (AllocaInst *AI = llvm::findAllocaForValue(II->getArgOperand(1))) { + // If an alloca is used as an input parameter of the recursive + // function then we need to remove lifetime.end marker as this + // alloca could be used later, otherwise, leave lifetime.end + // marker in place. + for (Value *Op : CI->args()) { + if (AI == llvm::findAllocaForValue(Op)) { + InstrsToRemove.push_back(I); // Remove lifetime.end marker. + break; + } + } + + return true; + } + } + } + + return false; +} + bool TailRecursionEliminator::eliminateCall(CallInst *CI) { ReturnInst *Ret = cast(CI->getParent()->getTerminator()); + SmallVector InstrsToRemove; // Ok, we found a potential tail call. We can currently only transform the // tail call if all of the instructions between the call and the return are @@ -595,6 +612,9 @@ if (canMoveAboveCall(&*BBI, CI, AA)) continue; + if (canSkipLifetimeMarker(&*BBI, CI, InstrsToRemove)) + continue; + // If we can't move the instruction above the call, it might be because it // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the @@ -620,9 +640,6 @@ if (!HeaderBB) createTailRecurseLoopHeader(CI); - if (RemovableCallsMustBeMarkedTail && !CI->isTailCall()) - return false; - // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. @@ -665,15 +682,18 @@ BranchInst *NewBI = BranchInst::Create(HeaderBB, Ret); NewBI->setDebugLoc(CI->getDebugLoc()); - BB->getInstList().erase(Ret); // Remove return. - BB->getInstList().erase(CI); // Remove call. + InstrsToRemove.push_back(Ret); // Remove return. + InstrsToRemove.push_back(CI); // Remove call. + + for (Instruction *Instr : InstrsToRemove) + BB->getInstList().erase(Instr); + DTU.applyUpdates({{DominatorTree::Insert, BB, HeaderBB}}); ++NumEliminated; return true; } -bool TailRecursionEliminator::foldReturnAndProcessPred( - ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) { +bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) { BasicBlock *BB = Ret->getParent(); bool Change = false; @@ -698,8 +718,7 @@ while (!UncondBranchPreds.empty()) { BranchInst *BI = UncondBranchPreds.pop_back_val(); BasicBlock *Pred = BI->getParent(); - if (CallInst *CI = - findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) { + if (CallInst *CI = findTRECandidate(BI)) { LLVM_DEBUG(dbgs() << "FOLDING: " << *BB << "INTO UNCOND BRANCH PRED: " << *Pred); FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU); @@ -720,9 +739,8 @@ return Change; } -bool TailRecursionEliminator::processReturningBlock( - ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) { - CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail); +bool TailRecursionEliminator::processReturningBlock(ReturnInst *Ret) { + CallInst *CI = findTRECandidate(Ret); if (!CI) return false; @@ -810,35 +828,25 @@ return false; bool MadeChange = false; - bool AllCallsAreTailCalls = false; - MadeChange |= markTails(F, AllCallsAreTailCalls, ORE); - if (!AllCallsAreTailCalls) - return MadeChange; + MadeChange |= markTails(F, ORE); // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... if (F.getFunctionType()->isVarArg()) return MadeChange; - // If false, we cannot perform TRE on tail calls marked with the 'tail' - // attribute, because doing so would cause the stack size to increase (real - // TRE would deallocate variable sized allocas, TRE doesn't). - bool CanTRETailMarkedCall = canTRE(F); + if (!canTRE(F)) + return MadeChange; TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU); // Change any tail recursive calls to loops. - // - // FIXME: The code generator produces really bad code when an 'escaping - // alloca' is changed from being a static alloca to being a dynamic alloca. - // Until this is resolved, disable this transformation if that would ever - // happen. This bug is PR962. for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) { BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB. if (ReturnInst *Ret = dyn_cast(BB->getTerminator())) { - bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall); + bool Change = TRE.processReturningBlock(Ret); if (!Change && BB->getFirstNonPHIOrDbg() == Ret) - Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall); + Change = TRE.foldReturnAndProcessPred(Ret); MadeChange |= Change; } } diff --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll --- a/llvm/test/Transforms/TailCallElim/basic.ll +++ b/llvm/test/Transforms/TailCallElim/basic.ll @@ -12,15 +12,16 @@ ret void } -; PR615. Make sure that we do not move the alloca so that it interferes with the tail call. +; Make sure that we do not do TRE if pointer to local stack +; escapes through function call. define i32 @test1() { ; CHECK: i32 @test1() ; CHECK-NEXT: alloca %A = alloca i32 ; [#uses=2] store i32 5, i32* %A call void @use(i32* %A) -; CHECK: tail call i32 @test1 - %X = tail call i32 @test1() ; [#uses=1] +; CHECK: call i32 @test1 + %X = call i32 @test1() ; [#uses=1] ret i32 %X } diff --git a/llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll b/llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll @@ -0,0 +1,113 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s + +; the test was generated from the following C++ source: +; +; int zoo ( S p1 ); +; +; int foo ( int count, S p1 ) { +; if ( count > 10 ) +; return zoo(p1); +; +; // After TRE: temporarily variable created for passing byvalue parameter +; // p1 could be used when zoo(p1) is called. +; return foo(count+1, p1); +; } + +; this test checks that lifetime.end marker is removed during TRE +; for byval parameter "agg.tmp.i". Specifically, when "call i32 @_Z3fooi1S" +; is replaced with "br label tailrecurse" the lifetime marker +; "@llvm.lifetime.end.p0i8(i64 20, i8* nonnull %4)", corresponding to the +; alloca for byval parameter, should be removed. Other lifetime markers +; (not related to parameters of recursive function) should be left in its +; original place. + +%struct.S = type { i32, i32, float, %struct.B } +%struct.B = type { i32, float } + +; Function Attrs: uwtable +define dso_local i32 @_Z3fooi1S(i32 %count, %struct.S* nocapture readonly byval(%struct.S) align 8 %p1) local_unnamed_addr #0 { +; CHECK-LABEL: @_Z3fooi1S( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AGG_TMP_I:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +; CHECK-NEXT: [[AGG_TMP14:%.*]] = alloca [[STRUCT_S]], align 8 +; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_S]], align 8 +; CHECK-NEXT: [[AGG_TMP1:%.*]] = alloca [[STRUCT_S]], align 8 +; CHECK-NEXT: br label [[TAILRECURSE:%.*]] +; CHECK: tailrecurse: +; CHECK-NEXT: [[COUNT_TR:%.*]] = phi i32 [ [[COUNT:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[P1_TR:%.*]] = phi %struct.S* [ [[P1:%.*]], [[ENTRY]] ], [ [[AGG_TMP_I]], [[IF_END]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[COUNT_TR]], 10 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast %struct.S* [[AGG_TMP]] to i8* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast %struct.S* [[P1_TR]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP0]], i8* nonnull align 8 dereferenceable(20) [[TMP1]], i64 20, i1 false) +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @_Z3zoo1S(%struct.S* nonnull byval(%struct.S) align 8 [[AGG_TMP]]) +; CHECK-NEXT: br label [[RETURN:%.*]] +; CHECK: if.end: +; CHECK-NEXT: [[ADD]] = add nsw i32 [[COUNT_TR]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast %struct.S* [[AGG_TMP1]] to i8* +; CHECK-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[P1_TR]] to i8* +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP2]], i8* nonnull align 8 dereferenceable(20) [[TMP3]], i64 20, i1 false) +; CHECK-NEXT: [[AGG_TMP14_0__SROA_CAST:%.*]] = bitcast %struct.S* [[AGG_TMP14]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull [[AGG_TMP14_0__SROA_CAST]]) +; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.S* [[AGG_TMP_I]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull [[TMP4]]) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[AGG_TMP14_0__SROA_CAST]], i8* nonnull align 8 dereferenceable(20) [[TMP2]], i64 20, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) [[TMP4]], i8* nonnull align 8 dereferenceable(20) [[AGG_TMP14_0__SROA_CAST]], i64 20, i1 false) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull [[AGG_TMP14_0__SROA_CAST]]) +; CHECK-NEXT: br label [[TAILRECURSE]] +; CHECK: return: +; CHECK-NEXT: ret i32 [[CALL]] +; +entry: + %agg.tmp.i = alloca %struct.S, align 8 + %agg.tmp14 = alloca %struct.S, align 8 + %agg.tmp = alloca %struct.S, align 8 + %agg.tmp1 = alloca %struct.S, align 8 + %cmp = icmp sgt i32 %count, 10 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %0 = bitcast %struct.S* %agg.tmp to i8* + %1 = bitcast %struct.S* %p1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %0, i8* nonnull align 8 dereferenceable(20) %1, i64 20, i1 false) + %call = call i32 @_Z3zoo1S(%struct.S* nonnull byval(%struct.S) align 8 %agg.tmp) + br label %return + +if.end: ; preds = %entry + %add = add nsw i32 %count, 1 + %2 = bitcast %struct.S* %agg.tmp1 to i8* + %3 = bitcast %struct.S* %p1 to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %2, i8* nonnull align 8 dereferenceable(20) %3, i64 20, i1 false) + %agg.tmp14.0..sroa_cast = bitcast %struct.S* %agg.tmp14 to i8* + call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %agg.tmp14.0..sroa_cast) + %4 = bitcast %struct.S* %agg.tmp.i to i8* + call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %4) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %agg.tmp14.0..sroa_cast, i8* nonnull align 8 dereferenceable(20) %2, i64 20, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 dereferenceable(20) %4, i8* nonnull align 8 dereferenceable(20) %agg.tmp14.0..sroa_cast, i64 20, i1 false) + %call.i = call i32 @_Z3fooi1S(i32 %add, %struct.S* nonnull byval(%struct.S) align 8 %agg.tmp.i) + call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %agg.tmp14.0..sroa_cast) + call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %4) + br label %return + +return: ; preds = %if.end, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call.i, %if.end ] + ret i32 %retval.0 +} + +declare dso_local i32 @_Z3zoo1S(%struct.S* byval(%struct.S) align 8) local_unnamed_addr #1 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #2 + +attributes #0 = { uwtable } +attributes #1 = { uwtable } +attributes #2 = { argmemonly nounwind willreturn } diff --git a/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll b/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s + +; This test checks that TRE would be done for only one recursive call. +; The test_multiple_exits function has three recursive calls. +; First recursive call could not be eliminated because there is +; escaped pointer to local variable. Second recursive call could +; be eliminated. Thrid recursive call could not be eliminated since +; this is not last call. Thus, test checks that TRE would be done +; for only second recursive call. + +; IR for that test was generated from the following C++ source: +; +; void capture_arg (int*); +; void test_multiple_exits (int param); +; if (param >= 0 && param < 10) { +; int temp; +; capture_arg(&temp); +; // TRE could not be done because pointer to local +; // variable "temp" is escaped. +; test_multiple_exits(param + 1); +; } else if (param >=10 && param < 20) { +; // TRE should be done. +; test_multiple_exits(param + 1); +; } else if (param >= 20 && param < 22) { +; // TRE could not be done since recursive +; // call is not last call. +; test_multiple_exits(param + 1); +; func(); +; } +; +; return; +; } + +; Function Attrs: noinline optnone uwtable +declare void @_Z11capture_argPi(i32* %param) #0 + +; Function Attrs: noinline optnone uwtable +declare void @_Z4funcv() #0 + +; Function Attrs: noinline nounwind uwtable +define dso_local void @_Z19test_multiple_exitsi(i32 %param) local_unnamed_addr #2 { +; CHECK-LABEL: @_Z19test_multiple_exitsi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[TAILRECURSE:%.*]] +; CHECK: tailrecurse: +; CHECK-NEXT: [[PARAM_TR:%.*]] = phi i32 [ [[PARAM:%.*]], [[ENTRY:%.*]] ], [ [[ADD6:%.*]], [[IF_THEN5:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[PARAM_TR]], 10 +; CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TEMP]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP1]]) #1 +; CHECK-NEXT: call void @_Z11capture_argPi(i32* nonnull [[TEMP]]) +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[PARAM_TR]], 1 +; CHECK-NEXT: call void @_Z19test_multiple_exitsi(i32 [[ADD]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP1]]) #1 +; CHECK-NEXT: br label [[IF_END14:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[PARAM_OFF:%.*]] = add i32 [[PARAM_TR]], -10 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[PARAM_OFF]], 10 +; CHECK-NEXT: br i1 [[TMP2]], label [[IF_THEN5]], label [[IF_ELSE7:%.*]] +; CHECK: if.then5: +; CHECK-NEXT: [[ADD6]] = add nuw nsw i32 [[PARAM_TR]], 1 +; CHECK-NEXT: br label [[TAILRECURSE]] +; CHECK: if.else7: +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[PARAM_TR]], -2 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 20 +; CHECK-NEXT: br i1 [[TMP4]], label [[IF_THEN11:%.*]], label [[IF_END14]] +; CHECK: if.then11: +; CHECK-NEXT: [[ADD12:%.*]] = add nsw i32 [[PARAM_TR]], 1 +; CHECK-NEXT: tail call void @_Z19test_multiple_exitsi(i32 [[ADD12]]) +; CHECK-NEXT: tail call void @_Z4funcv() +; CHECK-NEXT: ret void +; CHECK: if.end14: +; CHECK-NEXT: ret void +; +entry: + %temp = alloca i32, align 4 + %0 = icmp ult i32 %param, 10 + br i1 %0, label %if.then, label %if.else + +if.then: ; preds = %entry + %1 = bitcast i32* %temp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #2 + call void @_Z11capture_argPi(i32* nonnull %temp) + %add = add nuw nsw i32 %param, 1 + call void @_Z19test_multiple_exitsi(i32 %add) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #2 + br label %if.end14 + +if.else: ; preds = %entry + %param.off = add i32 %param, -10 + %2 = icmp ult i32 %param.off, 10 + br i1 %2, label %if.then5, label %if.else7 + +if.then5: ; preds = %if.else + %add6 = add nuw nsw i32 %param, 1 + call void @_Z19test_multiple_exitsi(i32 %add6) + br label %if.end14 + +if.else7: ; preds = %if.else + %3 = and i32 %param, -2 + %4 = icmp eq i32 %3, 20 + br i1 %4, label %if.then11, label %if.end14 + +if.then11: ; preds = %if.else7 + %add12 = add nsw i32 %param, 1 + call void @_Z19test_multiple_exitsi(i32 %add12) + call void @_Z4funcv() + br label %if.end14 + +if.end14: ; preds = %if.then5, %if.then11, %if.else7, %if.then + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2 + +attributes #0 = { nofree noinline norecurse nounwind uwtable } +attributes #1 = { nounwind uwtable } +attributes #2 = { argmemonly nounwind willreturn } diff --git a/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll b/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s + +; IR for that test was generated from the following C++ source: +; +;int count; +;__attribute__((noinline)) void globalIncrement(const int* param) { count += *param; } +; +;void test(int recurseCount) +;{ +; if (recurseCount == 0) return; +; int temp = 10; +; globalIncrement(&temp); +; test(recurseCount - 1); +;} +; + +@count = dso_local local_unnamed_addr global i32 0, align 4 + +; Function Attrs: nofree noinline norecurse nounwind uwtable +declare void @_Z15globalIncrementPKi(i32* nocapture readonly %param) #0 + +; Test that TRE could be done for recursive tail routine containing +; call to function receiving a pointer to local stack. + +; Function Attrs: nounwind uwtable +define dso_local void @_Z4testi(i32 %recurseCount) local_unnamed_addr #1 { +; CHECK-LABEL: @_Z4testi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TEMP:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[TAILRECURSE:%.*]] +; CHECK: tailrecurse: +; CHECK-NEXT: [[RECURSECOUNT_TR:%.*]] = phi i32 [ [[RECURSECOUNT:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[IF_END:%.*]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[RECURSECOUNT_TR]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[TEMP]] to i8* +; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP0]]) +; CHECK-NEXT: store i32 10, i32* [[TEMP]], align 4 +; CHECK-NEXT: call void @_Z15globalIncrementPKi(i32* nonnull [[TEMP]]) +; CHECK-NEXT: [[SUB]] = add nsw i32 [[RECURSECOUNT_TR]], -1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP0]]) +; CHECK-NEXT: br label [[TAILRECURSE]] +; CHECK: return: +; CHECK-NEXT: ret void +; +entry: + %temp = alloca i32, align 4 + %cmp = icmp eq i32 %recurseCount, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %0 = bitcast i32* %temp to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #6 + store i32 10, i32* %temp, align 4 + call void @_Z15globalIncrementPKi(i32* nonnull %temp) + %sub = add nsw i32 %recurseCount, -1 + call void @_Z4testi(i32 %sub) + call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #6 + br label %return + +return: ; preds = %entry, %if.end + ret void +} + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2 + +; Function Attrs: argmemonly nounwind willreturn +declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2 + +attributes #0 = { nofree noinline norecurse nounwind uwtable } +attributes #1 = { nounwind uwtable } +attributes #2 = { argmemonly nounwind willreturn }