Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -389,6 +389,7 @@ void initializeStripNonLineTableDebugInfoPass(PassRegistry&); void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGPass(PassRegistry&); +void initializeTailCallMarkPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); void initializeTailDuplicatePass(PassRegistry&); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&); Index: include/llvm/LinkAllPasses.h =================================================================== --- include/llvm/LinkAllPasses.h +++ include/llvm/LinkAllPasses.h @@ -170,6 +170,7 @@ (void) llvm::createStripNonDebugSymbolsPass(); (void) llvm::createStripDeadDebugInfoPass(); (void) llvm::createStripDeadPrototypesPass(); + (void) llvm::createTailCallMarkingPass(); (void) llvm::createTailCallEliminationPass(); (void) llvm::createJumpThreadingPass(); (void) llvm::createUnifyFunctionExitNodesPass(); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -271,6 +271,12 @@ /// regions that only contain uniform branches. Pass *createStructurizeCFGPass(bool SkipUniformRegions = false); +//===----------------------------------------------------------------------===// +// +// TailCallMarking - This pass mark call instructions as tail if possible +// +FunctionPass *createTailCallMarkingPass(); + //===----------------------------------------------------------------------===// // // TailCallElimination - This pass eliminates call instructions to the current Index: include/llvm/Transforms/Scalar/TailCallMarking.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Scalar/TailCallMarking.h @@ -0,0 +1,26 @@ +//===-- TailCallMarking.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass marks call instruction as "tail" if possible +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H +#define LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct TailCallMarkPass : PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -152,6 +152,7 @@ #include "llvm/Transforms/Scalar/Sink.h" #include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" +#include "llvm/Transforms/Scalar/TailCallMarking.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" @@ -410,6 +411,7 @@ !isOptimizingForSize(Level)) FPM.addPass(PGOMemOPSizeOpt()); + FPM.addPass(TailCallMarkPass()); FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); @@ -1187,6 +1189,7 @@ // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. + FPM.addPass(TailCallMarkPass()); FPM.addPass(TailCallElimPass()); // Run a few AA driver optimizations here and now to cleanup the code. Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -224,6 +224,7 @@ FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass()) FUNCTION_PASS("sroa", SROA()) +FUNCTION_PASS("tailcallmark", TailCallMarkPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) FUNCTION_PASS("verify", VerifierPass()) Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -353,6 +353,7 @@ if (SizeLevel == 0) MPM.add(createPGOMemOPSizeOptLegacyPass()); + MPM.add(createTailCallMarkingPass()); // Mark tail calls MPM.add(createTailCallEliminationPass()); // Eliminate tail calls MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createReassociatePass()); // Reassociate expressions @@ -898,6 +899,7 @@ // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. + PM.add(createTailCallMarkingPass()); PM.add(createTailCallEliminationPass()); // Run a few AA driven optimizations here and now, to cleanup the code. Index: lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- lib/Transforms/Scalar/CMakeLists.txt +++ lib/Transforms/Scalar/CMakeLists.txt @@ -69,6 +69,7 @@ SpeculateAroundPHIs.cpp StraightLineStrengthReduce.cpp StructurizeCFG.cpp + TailCallMarking.cpp TailRecursionElimination.cpp WarnMissedTransforms.cpp Index: lib/Transforms/Scalar/Scalar.cpp =================================================================== --- lib/Transforms/Scalar/Scalar.cpp +++ lib/Transforms/Scalar/Scalar.cpp @@ -95,6 +95,7 @@ initializeStructurizeCFGPass(Registry); initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); + initializeTailCallMarkPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); initializeSpeculativeExecutionLegacyPassPass(Registry); @@ -235,6 +236,10 @@ // NOTE: The simplify-libcalls pass has been removed. } +void LLVMAddTailCallMarkPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createTailCallMarkingPass()); +} + void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createTailCallEliminationPass()); } Index: lib/Transforms/Scalar/TailCallMarking.cpp =================================================================== --- /dev/null +++ lib/Transforms/Scalar/TailCallMarking.cpp @@ -0,0 +1,287 @@ +//===- TailCallMarking.cpp - Mark Tail Calls ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file mark call instruction as "tail" if possible. +// If it is guaranteed that callees do not access their caller stack frame, +// calls would be marked as "tail". +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/TailCallMarking.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "tailcallmark" + +STATISTIC(NumMarked, "Number of calls marked as tail"); + +namespace { +struct AllocaDerivedValueTracker { + // Start at a root value and walk its use-def chain to mark calls that use the + // value or a derived value in AllocaUsers, and places where it may escape in + // EscapePoints. + void walk(Value *Root) { + SmallVector Worklist; + SmallPtrSet Visited; + + auto AddUsesToWorklist = [&](Value *V) { + for (auto &U : V->uses()) { + if (!Visited.insert(&U).second) + continue; + Worklist.push_back(&U); + } + }; + + AddUsesToWorklist(Root); + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast(U->getUser()); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + CallSite CS(I); + // If the alloca-derived argument is passed byval it is not an escape + // point, or a use of an alloca. Calling with byval copies the contents + // of the alloca into argument registers or stack slots, which exist + // beyond the lifetime of the current frame. + if (CS.isArgOperand(U) && CS.isByValArgument(CS.getArgumentNo(U))) + continue; + bool IsNocapture = + CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U)); + callUsesLocalStack(CS, IsNocapture); + if (IsNocapture) { + // If the alloca-derived argument is passed in as nocapture, then it + // can't propagate to the call's return. That would be capturing. + continue; + } + break; + } + case Instruction::Load: { + // The result of a load is not alloca-derived (unless an alloca has + // otherwise escaped, but this is a local analysis). + continue; + } + case Instruction::Store: { + if (U->getOperandNo() == 0) + EscapePoints.insert(I); + continue; // Stores have no users to analyze. + } + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + case Instruction::AddrSpaceCast: + break; + default: + EscapePoints.insert(I); + break; + } + + AddUsesToWorklist(I); + } + } + + void callUsesLocalStack(CallSite CS, bool IsNocapture) { + // Add it to the list of alloca users. + AllocaUsers.insert(CS.getInstruction()); + + // If it's nocapture then it can't capture this alloca. + if (IsNocapture) + return; + + // If it can write to memory, it can leak the alloca value. + if (!CS.onlyReadsMemory()) + EscapePoints.insert(CS.getInstruction()); + } + + SmallPtrSet AllocaUsers; + SmallPtrSet EscapePoints; +}; +} // namespace + +static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) { + if (F.callsFunctionThatReturnsTwice()) + return false; + + // The local stack holds all alloca instructions and all byval arguments. + AllocaDerivedValueTracker Tracker; + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) + Tracker.walk(&Arg); + } + for (auto &BB : F) { + for (auto &I : BB) + if (AllocaInst *AI = dyn_cast(&I)) + Tracker.walk(AI); + } + + bool Modified = false; + + // Track whether a block is reachable after an alloca has escaped. Blocks that + // contain the escaping instruction will be marked as being visited without an + // escaped alloca, since that is how the block began. + enum VisitType { UNVISITED, UNESCAPED, ESCAPED }; + DenseMap Visited; + + // We propagate the fact that an alloca has escaped from block to successor. + // Visit the blocks that are propagating the escapedness first. To do this, we + // maintain two worklists. + SmallVector WorklistUnescaped, WorklistEscaped; + + // We may enter a block and visit it thinking that no alloca has escaped yet, + // then see an escape point and go back around a loop edge and come back to + // the same block twice. Because of this, we defer setting tail on calls when + // we first encounter them in a block. Every entry in this list does not + // statically use an alloca via use-def chain analysis, but may find an alloca + // through other means if the block turns out to be reachable after an escape + // point. + SmallVector DeferredTails; + + BasicBlock *BB = &F.getEntryBlock(); + VisitType Escaped = UNESCAPED; + do { + for (auto &I : *BB) { + if (Tracker.EscapePoints.count(&I)) + Escaped = ESCAPED; + + CallInst *CI = dyn_cast(&I); + if (!CI || CI->isTailCall() || isa(&I)) + continue; + + bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); + + if (!IsNoTail && CI->doesNotAccessMemory()) { + // A call to a readnone function whose arguments are all things computed + // outside this function can be marked tail. Even if you stored the + // alloca address into a global, a readnone function can't load the + // global anyhow. + // + // Note that this runs whether we know an alloca has escaped or not. If + // it has, then we can't trust Tracker.AllocaUsers to be accurate. + bool SafeToTail = true; + for (auto &Arg : CI->arg_operands()) { + if (isa(Arg.getUser())) + continue; + if (Argument *A = dyn_cast(Arg.getUser())) + if (!A->hasByValAttr()) + continue; + SafeToTail = false; + break; + } + if (SafeToTail) { + using namespace ore; + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI) + << "marked as tail call candidate (readnone)"; + }); + CI->setTailCall(); + NumMarked++; + Modified = true; + continue; + } + } + + if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { + DeferredTails.push_back(CI); + } + } + + for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { + auto &State = Visited[SuccBB]; + if (State < Escaped) { + State = Escaped; + if (State == ESCAPED) + WorklistEscaped.push_back(SuccBB); + else + WorklistUnescaped.push_back(SuccBB); + } + } + + if (!WorklistEscaped.empty()) { + BB = WorklistEscaped.pop_back_val(); + Escaped = ESCAPED; + } else { + BB = nullptr; + while (!WorklistUnescaped.empty()) { + auto *NextBB = WorklistUnescaped.pop_back_val(); + if (Visited[NextBB] == UNESCAPED) { + BB = NextBB; + Escaped = UNESCAPED; + break; + } + } + } + } while (BB); + + for (CallInst *CI : DeferredTails) { + if (Visited[CI->getParent()] != ESCAPED) { + // If the escape point was part way through the block, calls after the + // escape point wouldn't have been put into DeferredTails. + LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n"); + CI->setTailCall(); + NumMarked++; + Modified = true; + } + } + + return Modified; +} + +namespace { +struct TailCallMark : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + TailCallMark() : FunctionPass(ID) { + initializeTailCallMarkPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + return markTails( + F, &getAnalysis().getORE()); + } +}; +} // namespace + +char TailCallMark::ID = 0; +INITIALIZE_PASS_BEGIN(TailCallMark, "tailcallmark", "Tail Call Marking", false, + false) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_END(TailCallMark, "tailcallmark", "Tail Call Marking", false, + false) + +FunctionPass *llvm::createTailCallMarkingPass() { return new TailCallMark(); } +PreservedAnalyses TailCallMarkPass::run(Function &F, + FunctionAnalysisManager &AM) { + + auto &ORE = AM.getResult(F); + markTails(F, &ORE); + return PreservedAnalyses::all(); +} Index: lib/Transforms/Scalar/TailRecursionElimination.cpp =================================================================== --- lib/Transforms/Scalar/TailRecursionElimination.cpp +++ lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -50,6 +50,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/TailRecursionElimination.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" @@ -99,228 +100,6 @@ }); } -namespace { -struct AllocaDerivedValueTracker { - // Start at a root value and walk its use-def chain to mark calls that use the - // value or a derived value in AllocaUsers, and places where it may escape in - // EscapePoints. - void walk(Value *Root) { - SmallVector Worklist; - SmallPtrSet Visited; - - auto AddUsesToWorklist = [&](Value *V) { - for (auto &U : V->uses()) { - if (!Visited.insert(&U).second) - continue; - Worklist.push_back(&U); - } - }; - - AddUsesToWorklist(Root); - - while (!Worklist.empty()) { - Use *U = Worklist.pop_back_val(); - Instruction *I = cast(U->getUser()); - - switch (I->getOpcode()) { - case Instruction::Call: - case Instruction::Invoke: { - CallSite CS(I); - // If the alloca-derived argument is passed byval it is not an escape - // point, or a use of an alloca. Calling with byval copies the contents - // of the alloca into argument registers or stack slots, which exist - // beyond the lifetime of the current frame. - if (CS.isArgOperand(U) && CS.isByValArgument(CS.getArgumentNo(U))) - continue; - bool IsNocapture = - CS.isDataOperand(U) && CS.doesNotCapture(CS.getDataOperandNo(U)); - callUsesLocalStack(CS, IsNocapture); - if (IsNocapture) { - // If the alloca-derived argument is passed in as nocapture, then it - // can't propagate to the call's return. That would be capturing. - continue; - } - break; - } - case Instruction::Load: { - // The result of a load is not alloca-derived (unless an alloca has - // otherwise escaped, but this is a local analysis). - continue; - } - case Instruction::Store: { - if (U->getOperandNo() == 0) - EscapePoints.insert(I); - continue; // Stores have no users to analyze. - } - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::PHI: - case Instruction::Select: - case Instruction::AddrSpaceCast: - break; - default: - EscapePoints.insert(I); - break; - } - - AddUsesToWorklist(I); - } - } - - void callUsesLocalStack(CallSite CS, bool IsNocapture) { - // Add it to the list of alloca users. - AllocaUsers.insert(CS.getInstruction()); - - // If it's nocapture then it can't capture this alloca. - if (IsNocapture) - return; - - // If it can write to memory, it can leak the alloca value. - if (!CS.onlyReadsMemory()) - EscapePoints.insert(CS.getInstruction()); - } - - SmallPtrSet AllocaUsers; - SmallPtrSet EscapePoints; -}; -} - -static bool markTails(Function &F, bool &AllCallsAreTailCalls, - OptimizationRemarkEmitter *ORE) { - if (F.callsFunctionThatReturnsTwice()) - return false; - AllCallsAreTailCalls = true; - - // The local stack holds all alloca instructions and all byval arguments. - AllocaDerivedValueTracker Tracker; - for (Argument &Arg : F.args()) { - if (Arg.hasByValAttr()) - Tracker.walk(&Arg); - } - for (auto &BB : F) { - for (auto &I : BB) - if (AllocaInst *AI = dyn_cast(&I)) - Tracker.walk(AI); - } - - bool Modified = false; - - // Track whether a block is reachable after an alloca has escaped. Blocks that - // contain the escaping instruction will be marked as being visited without an - // escaped alloca, since that is how the block began. - enum VisitType { - UNVISITED, - UNESCAPED, - ESCAPED - }; - DenseMap Visited; - - // We propagate the fact that an alloca has escaped from block to successor. - // Visit the blocks that are propagating the escapedness first. To do this, we - // maintain two worklists. - SmallVector WorklistUnescaped, WorklistEscaped; - - // We may enter a block and visit it thinking that no alloca has escaped yet, - // then see an escape point and go back around a loop edge and come back to - // the same block twice. Because of this, we defer setting tail on calls when - // we first encounter them in a block. Every entry in this list does not - // statically use an alloca via use-def chain analysis, but may find an alloca - // through other means if the block turns out to be reachable after an escape - // point. - SmallVector DeferredTails; - - BasicBlock *BB = &F.getEntryBlock(); - VisitType Escaped = UNESCAPED; - do { - for (auto &I : *BB) { - if (Tracker.EscapePoints.count(&I)) - Escaped = ESCAPED; - - CallInst *CI = dyn_cast(&I); - if (!CI || CI->isTailCall() || isa(&I)) - continue; - - bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles(); - - if (!IsNoTail && CI->doesNotAccessMemory()) { - // A call to a readnone function whose arguments are all things computed - // outside this function can be marked tail. Even if you stored the - // alloca address into a global, a readnone function can't load the - // global anyhow. - // - // Note that this runs whether we know an alloca has escaped or not. If - // it has, then we can't trust Tracker.AllocaUsers to be accurate. - bool SafeToTail = true; - for (auto &Arg : CI->arg_operands()) { - if (isa(Arg.getUser())) - continue; - if (Argument *A = dyn_cast(Arg.getUser())) - if (!A->hasByValAttr()) - continue; - SafeToTail = false; - break; - } - if (SafeToTail) { - using namespace ore; - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI) - << "marked as tail call candidate (readnone)"; - }); - CI->setTailCall(); - Modified = true; - continue; - } - } - - if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) { - DeferredTails.push_back(CI); - } else { - AllCallsAreTailCalls = false; - } - } - - for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) { - auto &State = Visited[SuccBB]; - if (State < Escaped) { - State = Escaped; - if (State == ESCAPED) - WorklistEscaped.push_back(SuccBB); - else - WorklistUnescaped.push_back(SuccBB); - } - } - - if (!WorklistEscaped.empty()) { - BB = WorklistEscaped.pop_back_val(); - Escaped = ESCAPED; - } else { - BB = nullptr; - while (!WorklistUnescaped.empty()) { - auto *NextBB = WorklistUnescaped.pop_back_val(); - if (Visited[NextBB] == UNESCAPED) { - BB = NextBB; - Escaped = UNESCAPED; - break; - } - } - } - } while (BB); - - for (CallInst *CI : DeferredTails) { - if (Visited[CI->getParent()] != ESCAPED) { - // If the escape point was part way through the block, calls after the - // escape point wouldn't have been put into DeferredTails. - LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n"); - CI->setTailCall(); - Modified = true; - } else { - AllCallsAreTailCalls = false; - } - } - - return Modified; -} - /// Return true if it is safe to move the specified /// instruction from after the call to before the call, assuming that all /// instructions between the call and this instruction are movable. @@ -744,7 +523,20 @@ return eliminateRecursiveTailCall(CI, Ret, OldEntry, TailCallsAreMarkedTail, ArgumentPHIs, AA, ORE, DTU); } +/// Return true if all call instructions which can be reachble entry blcok are +/// marked as "tail". +/// +static bool areAllCallsTailCalls(Function &F) { + for (BasicBlock *BB : depth_first(&F)) { + for (Instruction &I : *BB) { + CallInst *CI = dyn_cast(&I); + if (CI && !CI->isTailCall()) + return false; + } + } + return true; +} static bool eliminateTailRecursion(Function &F, const TargetTransformInfo *TTI, AliasAnalysis *AA, OptimizationRemarkEmitter *ORE, @@ -753,8 +545,7 @@ return false; bool MadeChange = false; - bool AllCallsAreTailCalls = false; - MadeChange |= markTails(F, AllCallsAreTailCalls, ORE); + bool AllCallsAreTailCalls = areAllCallsTailCalls(F); if (!AllCallsAreTailCalls) return MadeChange; Index: test/Other/new-pm-defaults.ll =================================================================== --- test/Other/new-pm-defaults.ll +++ test/Other/new-pm-defaults.ll @@ -140,6 +140,7 @@ ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass +; CHECK-O-NEXT: Running pass: TailCallMarkPass ; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass Index: test/Other/new-pm-lto-defaults.ll =================================================================== --- test/Other/new-pm-lto-defaults.ll +++ test/Other/new-pm-lto-defaults.ll @@ -81,6 +81,7 @@ ; CHECK-O2-NEXT: Running pass: JumpThreadingPass ; CHECK-O2-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O2-NEXT: Running pass: SROA on foo +; CHECK-O2-NEXT: Running pass: TailCallMarkPass on foo ; CHECK-O2-NEXT: Running pass: TailCallElimPass on foo ; CHECK-O2-NEXT: Finished llvm::Function pass manager run. ; CHECK-O2-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass> Index: test/Other/new-pm-thinlto-defaults.ll =================================================================== --- test/Other/new-pm-thinlto-defaults.ll +++ test/Other/new-pm-thinlto-defaults.ll @@ -120,6 +120,7 @@ ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass +; CHECK-O-NEXT: Running pass: TailCallMarkPass ; CHECK-O-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass Index: test/Transforms/Inline/byval-tail-call.ll =================================================================== --- test/Transforms/Inline/byval-tail-call.ll +++ test/Transforms/Inline/byval-tail-call.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -basicaa -tailcallelim -inline -instcombine -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(tailcallelim),cgscc(inline,function(instcombine,dse))' -S | FileCheck %s +; RUN: opt < %s -basicaa -tailcallmark -tailcallelim -inline -instcombine -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(tailcallmark,tailcallelim),cgscc(inline,function(instcombine,dse))' -S | FileCheck %s ; PR7272 ; Calls that capture byval parameters cannot be marked as tail calls. Other Index: test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll =================================================================== --- test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll +++ test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s ; PR7328 ; PR7506 define i32 @foo(i32 %x) { Index: test/Transforms/TailCallElim/accum_recursion.ll =================================================================== --- test/Transforms/TailCallElim/accum_recursion.ll +++ test/Transforms/TailCallElim/accum_recursion.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s define i32 @test1_factorial(i32 %x) { entry: Index: test/Transforms/TailCallElim/basic.ll =================================================================== --- test/Transforms/TailCallElim/basic.ll +++ test/Transforms/TailCallElim/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s declare void @noarg() declare void @use(i32*) Index: test/Transforms/TailCallElim/debugloc.ll =================================================================== --- test/Transforms/TailCallElim/debugloc.ll +++ test/Transforms/TailCallElim/debugloc.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -debugify -tailcallelim -S | FileCheck %s +; RUN: opt < %s -debugify -tailcallmark -tailcallelim -S | FileCheck %s define void @foo() { entry: Index: test/Transforms/TailCallElim/deopt-bundle.ll =================================================================== --- test/Transforms/TailCallElim/deopt-bundle.ll +++ test/Transforms/TailCallElim/deopt-bundle.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s define i32 @f_1(i32 %x) { ; CHECK-LABEL: @f_1( Index: test/Transforms/TailCallElim/inf-recursion.ll =================================================================== --- test/Transforms/TailCallElim/inf-recursion.ll +++ test/Transforms/TailCallElim/inf-recursion.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s ; Don't turn this into an infinite loop, this is probably the implementation ; of fabs and we expect the codegen to lower fabs. Index: test/Transforms/TailCallElim/notail.ll =================================================================== --- test/Transforms/TailCallElim/notail.ll +++ test/Transforms/TailCallElim/notail.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s ; CHECK: tail call void @callee0() ; CHECK: notail call void @callee1() Index: test/Transforms/TailCallElim/opt-remarks-recursion.ll =================================================================== --- test/Transforms/TailCallElim/opt-remarks-recursion.ll +++ test/Transforms/TailCallElim/opt-remarks-recursion.ll @@ -1,5 +1,5 @@ -; RUN: opt %s -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s -; RUN: opt %s -o /dev/null -passes='require,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s +; RUN: opt %s -tailcallmark -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s +; RUN: opt %s -o /dev/null -passes='require,tailcallmark,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s ; CHECK: /home/davide/pat.c:2:20: transforming tail recursion into loop define i32 @fib(i32 %n) nounwind ssp { Index: test/Transforms/TailCallElim/reorder_load.ll =================================================================== --- test/Transforms/TailCallElim/reorder_load.ll +++ test/Transforms/TailCallElim/reorder_load.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -tailcallmark -tailcallelim -verify-dom-info -S | FileCheck %s ; PR4323 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"