Index: llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -358,8 +358,11 @@ /// Return true if the specified value is the same when the return would exit /// as it was when the initial iteration of the recursive function was executed. /// -/// We currently handle static constants and arguments that are not modified as -/// part of the recursion. +/// We currently handle +/// * static constants +/// * arguments that are not modified as part of the recursion +/// * returns only reachable from one case of a switch on a value +/// * in readonly functions, GEPs and loads where all operands are dyn const static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { if (isa(V)) return true; // Static constants are always dyn consts @@ -387,7 +390,24 @@ if (SI->getCondition() == V) return SI->getDefaultDest() != RI->getParent(); - // Not a constant or immutable argument, we can't safely transform. + // In readonly functions, GEPs and loads where all operands are + // dynamic constant are also dynamic constant + if (CI->onlyReadsMemory()) { + if (isa(V) || isa(V)) { + Instruction *I = cast(V); + + bool OpsAreDynConsts = true; + for (Value *Op : I->operand_values()) { + OpsAreDynConsts = isDynamicConstant(Op, CI, RI); + if (!OpsAreDynConsts) + break; + } + if (OpsAreDynConsts) + return true; + } + } + + // we can't safely transform. return false; } @@ -497,6 +517,27 @@ return CI; } +static void moveInstAndDepsToEntry(Instruction *I, Function *F) { + // This is only safe if the instruction is dynamic constant, + // which should alway be true if we are here. + + Instruction *Front = &F->getEntryBlock().front(); + I->moveBefore(Front); + + for (Value *Op : I->operand_values()) { + if (Instruction *OpI = dyn_cast(Op)) { + if (I == OpI) { + // This can happen if the instruction had an operand that was a + // dynamic constant argument and we replaced it with a PHI that + // selects itself + continue; + } + + moveInstAndDepsToEntry(OpI, F); + } + } +} + static bool eliminateRecursiveTailCall( CallInst *CI, ReturnInst *Ret, BasicBlock *&OldEntry, bool &TailCallsAreMarkedTail, SmallVectorImpl &ArgumentPHIs, @@ -642,10 +683,19 @@ // it will not show up as a predecessor. for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; - if (P == &F->getEntryBlock()) - AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); - else + if (P == &F->getEntryBlock()) { + Value *AccRecElimInitVal = AccumulatorRecursionEliminationInitVal; + // If the initial value is an instruction, then we need to move it and + // its dependencies to the entry so it is available to the PHI. If we + // have gotten here then we know that the instruction is dynamic + // constant and thus is safe to move. + if (Instruction *I = dyn_cast(AccRecElimInitVal)) + moveInstAndDepsToEntry(I, F); + + AccPN->addIncoming(AccRecElimInitVal, P); + } else { AccPN->addIncoming(AccPN, P); + } } if (AccRecInstr) { Index: llvm/test/Transforms/TailCallElim/accum_recursion.ll =================================================================== --- llvm/test/Transforms/TailCallElim/accum_recursion.ll +++ llvm/test/Transforms/TailCallElim/accum_recursion.ll @@ -73,3 +73,33 @@ ret i64 %n ; CHECK: ret i64 %accumulator.tr } + +%struct.Relative = type { i32, [5 x i8] } + +define i32 @test4_readonly(%struct.Relative* %relativePtr, i32 %index) nounwind readonly { +; CHECK-LABEL: @test4_readonly( +entry: +; CHECK: %0 = getelementptr inbounds %struct.Relative, %struct.Relative* %relativePtr, i64 0, i32 0 +; CHECK: %1 = load i32, i32* %0, align 4 +; CHECK: tailrecurse: +; CHECK: %accumulator.tr = phi i32 [ %1, %entry ] +; CHECK: %index.tr = phi i32 [ %index, %entry ] + %0 = icmp eq i32 %index, 0 + br i1 %0, label %then, label %else + +then: + %1 = getelementptr inbounds %struct.Relative, %struct.Relative* %relativePtr, i64 0, i32 0 + %2 = load i32, i32* %1, align 4 + ret i32 %2 + +else: + %3 = add i32 %index, -1 + %4 = zext i32 %3 to i64 + %5 = getelementptr inbounds %struct.Relative, %struct.Relative* %relativePtr, i64 0, i32 1, i64 %4 + %6 = load i8, i8* %5, align 1 + %7 = zext i8 %6 to i32 + %8 = call i32 @test4_readonly(%struct.Relative* %relativePtr, i32 %3) +; CHECK-NOT: call i32 + %9 = add i32 %8, %7 + ret i32 %9 +}