Index: llvm/bindings/go/llvm/transforms_scalar.go =================================================================== --- llvm/bindings/go/llvm/transforms_scalar.go +++ llvm/bindings/go/llvm/transforms_scalar.go @@ -39,6 +39,7 @@ C.LLVMAddScalarReplAggregatesPassWithThreshold(pm.C, C.int(threshold)) } func (pm PassManager) AddSimplifyLibCallsPass() { C.LLVMAddSimplifyLibCallsPass(pm.C) } +func (pm PassManager) AddTailCallMarkingPass() { C.LLVMAddTailCallMarkingPass(pm.C) } func (pm PassManager) AddTailCallEliminationPass() { C.LLVMAddTailCallEliminationPass(pm.C) } func (pm PassManager) AddDemoteMemoryToRegisterPass() { C.LLVMAddDemoteMemoryToRegisterPass(pm.C) } func (pm PassManager) AddVerifierPass() { C.LLVMAddVerifierPass(pm.C) } Index: llvm/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli =================================================================== --- llvm/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli +++ llvm/bindings/ocaml/transforms/scalar_opts/llvm_scalar_opts.mli @@ -156,6 +156,11 @@ : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit = "llvm_add_simplify_lib_calls" +(** See the [llvm::createTailCallMarkingPass] function. *) +external add_tail_call_marking + : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit + = "llvm_add_tail_call_marking" + (** See the [llvm::createTailCallEliminationPass] function. *) external add_tail_call_elimination : [< Llvm.PassManager.any ] Llvm.PassManager.t -> unit Index: llvm/bindings/ocaml/transforms/scalar_opts/scalar_opts_ocaml.c =================================================================== --- llvm/bindings/ocaml/transforms/scalar_opts/scalar_opts_ocaml.c +++ llvm/bindings/ocaml/transforms/scalar_opts/scalar_opts_ocaml.c @@ -200,6 +200,12 @@ return Val_unit; } +/* [ unit */ +value llvm_add_tail_call_marking(LLVMPassManagerRef PM) { + LLVMAddTailCallMarkingPass(PM); + return Val_unit; +} + /* [ unit */ value llvm_add_demote_memory_to_register(LLVMPassManagerRef PM) { LLVMAddDemoteMemoryToRegisterPass(PM); Index: llvm/examples/OrcV2Examples/LLJITWithOptimizingIRTransform/LLJITWithOptimizingIRTransform.cpp =================================================================== --- llvm/examples/OrcV2Examples/LLJITWithOptimizingIRTransform/LLJITWithOptimizingIRTransform.cpp +++ llvm/examples/OrcV2Examples/LLJITWithOptimizingIRTransform/LLJITWithOptimizingIRTransform.cpp @@ -72,6 +72,7 @@ class MyOptimizationTransform { public: MyOptimizationTransform() : PM(std::make_unique()) { + PM->add(createTailCallMarkingPass()); PM->add(createTailCallEliminationPass()); PM->add(createFunctionInliningPass()); PM->add(createIndVarSimplifyPass()); Index: llvm/include/llvm-c/Transforms/Scalar.h =================================================================== --- llvm/include/llvm-c/Transforms/Scalar.h +++ llvm/include/llvm-c/Transforms/Scalar.h @@ -125,6 +125,9 @@ /** See llvm::createSimplifyLibCallsPass function. */ void LLVMAddSimplifyLibCallsPass(LLVMPassManagerRef PM); +/** See llvm::createTailCallMarkingPass function. */ +void LLVMAddTailCallMarkingPass(LLVMPassManagerRef PM); + /** See llvm::createTailCallEliminationPass function. */ void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM); Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -439,6 +439,7 @@ void initializeStripSymbolsPass(PassRegistry&); void initializeStructurizeCFGLegacyPassPass(PassRegistry &); void initializeTailCallElimPass(PassRegistry&); +void initializeTailCallMarkPass(PassRegistry&); void initializeTailDuplicatePass(PassRegistry&); void initializeTargetLibraryInfoWrapperPassPass(PassRegistry&); void initializeTargetPassConfigPass(PassRegistry&); Index: llvm/include/llvm/LinkAllPasses.h =================================================================== --- llvm/include/llvm/LinkAllPasses.h +++ llvm/include/llvm/LinkAllPasses.h @@ -175,6 +175,7 @@ (void) llvm::createStripNonDebugSymbolsPass(); (void) llvm::createStripDeadDebugInfoPass(); (void) llvm::createStripDeadPrototypesPass(); + (void) llvm::createTailCallMarkingPass(); (void) llvm::createTailCallEliminationPass(); (void) llvm::createJumpThreadingPass(); (void) llvm::createDFAJumpThreadingPass(); Index: llvm/include/llvm/Transforms/Scalar.h =================================================================== --- llvm/include/llvm/Transforms/Scalar.h +++ llvm/include/llvm/Transforms/Scalar.h @@ -286,6 +286,12 @@ /// regions that only contain uniform branches. Pass *createStructurizeCFGPass(bool SkipUniformRegions = false); +//===----------------------------------------------------------------------===// +// +// TailCallMarking - This pass mark call instructions as tail if possible +// +FunctionPass *createTailCallMarkingPass(); + //===----------------------------------------------------------------------===// // // TailCallElimination - This pass eliminates call instructions to the current Index: llvm/include/llvm/Transforms/Scalar/TailCallMarking.h =================================================================== --- /dev/null +++ llvm/include/llvm/Transforms/Scalar/TailCallMarking.h @@ -0,0 +1,28 @@ +//===-- TailCallMarking.h -----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass marks call instruction as "tail". More specifically, if it can +// prove that callees do not access their caller stack frame, they are marked as +// eligible for tail call elimination (by the code generator). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H +#define LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H + +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +struct TailCallMarkPass : PassInfoMixin { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_TAILCALLMARKING_H Index: llvm/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/lib/Passes/PassBuilder.cpp +++ llvm/lib/Passes/PassBuilder.cpp @@ -212,6 +212,7 @@ #include "llvm/Transforms/Scalar/SpeculativeExecution.h" #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" #include "llvm/Transforms/Scalar/StructurizeCFG.h" +#include "llvm/Transforms/Scalar/TailCallMarking.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" Index: llvm/lib/Passes/PassBuilderPipelines.cpp =================================================================== --- llvm/lib/Passes/PassBuilderPipelines.cpp +++ llvm/lib/Passes/PassBuilderPipelines.cpp @@ -112,6 +112,7 @@ #include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/SpeculativeExecution.h" +#include "llvm/Transforms/Scalar/TailCallMarking.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Scalar/WarnMissedTransforms.h" #include "llvm/Transforms/Utils/AddDiscriminators.h" @@ -437,6 +438,7 @@ !Level.isOptimizingForSize()) FPM.addPass(PGOMemOPSizeOpt()); + FPM.addPass(TailCallMarkPass()); FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); @@ -1590,6 +1592,7 @@ // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. + FPM.addPass(TailCallMarkPass()); FPM.addPass(TailCallElimPass()); // Run a few AA driver optimizations here and now to cleanup the code. Index: llvm/lib/Passes/PassRegistry.def =================================================================== --- llvm/lib/Passes/PassRegistry.def +++ llvm/lib/Passes/PassRegistry.def @@ -350,6 +350,7 @@ FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) +FUNCTION_PASS("tailcallmark", TailCallMarkPass()) FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass()) FUNCTION_PASS("vector-combine", VectorCombinePass()) FUNCTION_PASS("verify", VerifierPass()) Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -432,10 +432,12 @@ MPM.add(createPGOMemOPSizeOptLegacyPass()); // TODO: Investigate the cost/benefit of tail call elimination on debugging. - if (OptLevel > 1) + if (OptLevel > 1) { + MPM.add(createTailCallMarkingPass()); // Eliminate tail calls MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createReassociatePass()); // Reassociate expressions + } + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions // The matrix extension can introduce large vector operations early, which can // benefit from running vector-combine early on. @@ -1112,8 +1114,10 @@ // LTO provides additional opportunities for tailcall elimination due to // link-time inlining, and visibility of nocapture attribute. - if (OptLevel > 1) + if (OptLevel > 1) { + PM.add(createTailCallMarkingPass()); PM.add(createTailCallEliminationPass()); + } // Infer attributes on declarations, call sites, arguments, etc. PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. Index: llvm/lib/Transforms/Scalar/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/Scalar/CMakeLists.txt +++ llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -76,6 +76,7 @@ SpeculativeExecution.cpp StraightLineStrengthReduce.cpp StructurizeCFG.cpp + TailCallMarking.cpp TailRecursionElimination.cpp WarnMissedTransforms.cpp Index: llvm/lib/Transforms/Scalar/Scalar.cpp =================================================================== --- llvm/lib/Transforms/Scalar/Scalar.cpp +++ llvm/lib/Transforms/Scalar/Scalar.cpp @@ -103,6 +103,7 @@ initializeStructurizeCFGLegacyPassPass(Registry); initializeSimpleLoopUnswitchLegacyPassPass(Registry); initializeSinkingLegacyPassPass(Registry); + initializeTailCallMarkPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPLegacyPassPass(Registry); initializeSpeculativeExecutionLegacyPassPass(Registry); @@ -255,6 +256,10 @@ // NOTE: The simplify-libcalls pass has been removed. } +void LLVMAddTailCallMarkPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createTailCallMarkingPass()); +} + void LLVMAddTailCallEliminationPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createTailCallEliminationPass()); } Index: llvm/lib/Transforms/Scalar/TailCallMarking.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/Scalar/TailCallMarking.cpp @@ -0,0 +1,321 @@ +//===- TailCallMarking.cpp - Mark Tail Calls ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file mark call instruction as "tail" if possible. +// If it is guaranteed that callees do not access their caller stack frame, +// calls would be marked as "tail". +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/TailCallMarking.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#define DEBUG_TYPE "tailcallmark" + +using namespace llvm; + +STATISTIC(NumMarked, "Number of calls marked as tail"); + +namespace { +struct AllocaDerivedValueTracker { + // Start at a root value and walk its use-def chain to mark calls that use the + // value or a derived value in AllocaUsers, and places where it may escape in + // EscapePoints. + void walk(Value *Root) { + SmallVector Worklist; + SmallPtrSet Visited; + + auto AddUsesToWorklist = [&](Value *V) { + for (auto &U : V->uses()) { + if (!Visited.insert(&U).second) + continue; + Worklist.push_back(&U); + } + }; + + AddUsesToWorklist(Root); + + while (!Worklist.empty()) { + Use *U = Worklist.pop_back_val(); + Instruction *I = cast(U->getUser()); + + switch (I->getOpcode()) { + case Instruction::Call: + case Instruction::Invoke: { + auto &CB = cast(*I); + // If the alloca-derived argument is passed byval it is not an escape + // point, or a use of an alloca. Calling with byval copies the contents + // of the alloca into argument registers or stack slots, which exist + // beyond the lifetime of the current frame. + if (CB.isArgOperand(U) && CB.isByValArgument(CB.getArgOperandNo(U))) + continue; + bool IsNocapture = + CB.isDataOperand(U) && CB.doesNotCapture(CB.getDataOperandNo(U)); + callUsesLocalStack(CB, IsNocapture); + if (IsNocapture) { + // If the alloca-derived argument is passed in as nocapture, then it + // can't propagate to the call's return. That would be capturing. + continue; + } + break; + } + case Instruction::Load: { + // The result of a load is not alloca-derived (unless an alloca has + // otherwise escaped, but this is a local analysis). + continue; + } + case Instruction::Store: { + if (U->getOperandNo() == 0) + EscapePoints.insert(I); + continue; // Stores have no users to analyze. + } + case Instruction::BitCast: + case Instruction::GetElementPtr: + case Instruction::PHI: + case Instruction::Select: + case Instruction::AddrSpaceCast: + break; + default: + EscapePoints.insert(I); + break; + } + + AddUsesToWorklist(I); + } + } + + void callUsesLocalStack(CallBase &CB, bool IsNocapture) { + // Add it to the list of alloca users. + AllocaUsers.insert(&CB); + + // If it's nocapture then it can't capture this alloca. + if (IsNocapture) + return; + + // If it can write to memory, it can leak the alloca value. + if (!CB.onlyReadsMemory()) + EscapePoints.insert(&CB); + } + + SmallPtrSet AllocaUsers; + SmallPtrSet EscapePoints; +}; +} // namespace + +static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) { + if (F.callsFunctionThatReturnsTwice()) + return false; + + // The local stack holds all alloca instructions and all byval arguments. + AllocaDerivedValueTracker Tracker; + for (Argument &Arg : F.args()) { + if (Arg.hasByValAttr()) + Tracker.walk(&Arg); + } + for (auto &BB : F) { + for (auto &I : BB) + if (AllocaInst *AI = dyn_cast(&I)) + Tracker.walk(AI); + } + + bool Modified = false; + + // Track whether a block is reachable after an alloca has escaped. Blocks that + // contain the escaping instruction will be marked as being visited without an + // escaped alloca, since that is how the block began. + enum VisitType { UNVISITED, UNESCAPED, ESCAPED }; + DenseMap Visited; + + // We propagate the fact that an alloca has escaped from block to successor. + // Visit the blocks that are propagating the escapedness first. To do this, we + // maintain two worklists. + SmallVector WorklistUnescaped, WorklistEscaped; + + // We may enter a block and visit it thinking that no alloca has escaped yet, + // then see an escape point and go back around a loop edge and come back to + // the same block twice. Because of this, we defer setting tail on calls when + // we first encounter them in a block. Every entry in this list does not + // statically use an alloca via use-def chain analysis, but may find an alloca + // through other means if the block turns out to be reachable after an escape + // point. + SmallVector DeferredTails; + + BasicBlock *BB = &F.getEntryBlock(); + VisitType Escaped = UNESCAPED; + do { + for (auto &I : *BB) { + if (Tracker.EscapePoints.count(&I)) + Escaped = ESCAPED; + + CallInst *CI = dyn_cast(&I); + // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is + // considered accessing memory and will be marked as a tail call if we + // don't bail out here. + if (!CI || CI->isTailCall() || isa(&I) || + isa(&I)) + continue; + + // Special-case operand bundle "clang.arc.attachedcall". + bool IsNoTail = + CI->isNoTailCall() || CI->hasOperandBundlesOtherThan( + LLVMContext::OB_clang_arc_attachedcall); + + if (!IsNoTail && CI->doesNotAccessMemory()) { + // A call to a readnone function whose arguments are all things computed + // outside this function can be marked tail. Even if you stored the + // alloca address into a global, a readnone function can't load the + // global anyhow. + // + // Note that this runs whether we know an alloca has escaped or not. If + // it has, then we can't trust Tracker.AllocaUsers to be accurate. + bool SafeToTail = true; + for (auto &Arg : CI->args()) { + if (isa(Arg.getUser())) + continue; + if (Argument *A = dyn_cast(Arg.getUser())) + if (!A->hasByValAttr()) + continue; + SafeToTail = false; + break; + } + if (SafeToTail) { + using namespace ore; + ORE->emit([&]() { + return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI) + << "marked as tail call candidate (readnone)"; + }); + CI->setTailCall(); + NumMarked++; + Modified = true; + continue; + } + } + + if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) + DeferredTails.push_back(CI); + } + + for (auto *SuccBB : successors(BB)) { + auto &State = Visited[SuccBB]; + if (State < Escaped) { + State = Escaped; + if (State == ESCAPED) + WorklistEscaped.push_back(SuccBB); + else + WorklistUnescaped.push_back(SuccBB); + } + } + + if (!WorklistEscaped.empty()) { + BB = WorklistEscaped.pop_back_val(); + Escaped = ESCAPED; + } else { + BB = nullptr; + while (!WorklistUnescaped.empty()) { + auto *NextBB = WorklistUnescaped.pop_back_val(); + if (Visited[NextBB] == UNESCAPED) { + BB = NextBB; + Escaped = UNESCAPED; + break; + } + } + } + } while (BB); + + for (CallInst *CI : DeferredTails) { + if (Visited[CI->getParent()] != ESCAPED) { + // If the escape point was part way through the block, calls after the + // escape point wouldn't have been put into DeferredTails. + LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n"); + CI->setTailCall(); + NumMarked++; + Modified = true; + } + } + + return Modified; +} + +namespace { +struct TailCallMark : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + TailCallMark() : FunctionPass(ID) { + initializeTailCallMarkPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + return markTails( + F, &getAnalysis().getORE()); + } +}; +} // namespace + +char TailCallMark::ID = 0; +INITIALIZE_PASS_BEGIN(TailCallMark, "tailcallmark", "Tail Call Marking", false, + false) +INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_END(TailCallMark, "tailcallmark", "Tail Call Marking", false, + false) + +FunctionPass *llvm::createTailCallMarkingPass() { return new TailCallMark(); } +PreservedAnalyses TailCallMarkPass::run(Function &F, + FunctionAnalysisManager &AM) { + + auto &ORE = AM.getResult(F); + bool Changed = markTails(F, &ORE); + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet(); + PA.preserve(); + return PA; +} Index: llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -191,141 +191,7 @@ }; } -static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) { - if (F.callsFunctionThatReturnsTwice()) - return false; - - // The local stack holds all alloca instructions and all byval arguments. - AllocaDerivedValueTracker Tracker; - for (Argument &Arg : F.args()) { - if (Arg.hasByValAttr()) - Tracker.walk(&Arg); - } - for (auto &BB : F) { - for (auto &I : BB) - if (AllocaInst *AI = dyn_cast(&I)) - Tracker.walk(AI); - } - bool Modified = false; - - // Track whether a block is reachable after an alloca has escaped. Blocks that - // contain the escaping instruction will be marked as being visited without an - // escaped alloca, since that is how the block began. - enum VisitType { - UNVISITED, - UNESCAPED, - ESCAPED - }; - DenseMap Visited; - - // We propagate the fact that an alloca has escaped from block to successor. - // Visit the blocks that are propagating the escapedness first. To do this, we - // maintain two worklists. - SmallVector WorklistUnescaped, WorklistEscaped; - - // We may enter a block and visit it thinking that no alloca has escaped yet, - // then see an escape point and go back around a loop edge and come back to - // the same block twice. Because of this, we defer setting tail on calls when - // we first encounter them in a block. Every entry in this list does not - // statically use an alloca via use-def chain analysis, but may find an alloca - // through other means if the block turns out to be reachable after an escape - // point. - SmallVector DeferredTails; - - BasicBlock *BB = &F.getEntryBlock(); - VisitType Escaped = UNESCAPED; - do { - for (auto &I : *BB) { - if (Tracker.EscapePoints.count(&I)) - Escaped = ESCAPED; - - CallInst *CI = dyn_cast(&I); - // A PseudoProbeInst has the IntrInaccessibleMemOnly tag hence it is - // considered accessing memory and will be marked as a tail call if we - // don't bail out here. - if (!CI || CI->isTailCall() || isa(&I) || - isa(&I)) - continue; - - // Special-case operand bundle "clang.arc.attachedcall". - bool IsNoTail = - CI->isNoTailCall() || CI->hasOperandBundlesOtherThan( - LLVMContext::OB_clang_arc_attachedcall); - - if (!IsNoTail && CI->doesNotAccessMemory()) { - // A call to a readnone function whose arguments are all things computed - // outside this function can be marked tail. Even if you stored the - // alloca address into a global, a readnone function can't load the - // global anyhow. - // - // Note that this runs whether we know an alloca has escaped or not. If - // it has, then we can't trust Tracker.AllocaUsers to be accurate. - bool SafeToTail = true; - for (auto &Arg : CI->args()) { - if (isa(Arg.getUser())) - continue; - if (Argument *A = dyn_cast(Arg.getUser())) - if (!A->hasByValAttr()) - continue; - SafeToTail = false; - break; - } - if (SafeToTail) { - using namespace ore; - ORE->emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "tailcall-readnone", CI) - << "marked as tail call candidate (readnone)"; - }); - CI->setTailCall(); - Modified = true; - continue; - } - } - - if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) - DeferredTails.push_back(CI); - } - - for (auto *SuccBB : successors(BB)) { - auto &State = Visited[SuccBB]; - if (State < Escaped) { - State = Escaped; - if (State == ESCAPED) - WorklistEscaped.push_back(SuccBB); - else - WorklistUnescaped.push_back(SuccBB); - } - } - - if (!WorklistEscaped.empty()) { - BB = WorklistEscaped.pop_back_val(); - Escaped = ESCAPED; - } else { - BB = nullptr; - while (!WorklistUnescaped.empty()) { - auto *NextBB = WorklistUnescaped.pop_back_val(); - if (Visited[NextBB] == UNESCAPED) { - BB = NextBB; - Escaped = UNESCAPED; - break; - } - } - } - } while (BB); - - for (CallInst *CI : DeferredTails) { - if (Visited[CI->getParent()] != ESCAPED) { - // If the escape point was part way through the block, calls after the - // escape point wouldn't have been put into DeferredTails. - LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n"); - CI->setTailCall(); - Modified = true; - } - } - - return Modified; -} /// Return true if it is safe to move the specified /// instruction from after the call to before the call, assuming that all @@ -850,7 +716,6 @@ return false; bool MadeChange = false; - MadeChange |= markTails(F, ORE); // If this function is a varargs function, we won't be able to PHI the args // right, so don't even try to convert it... Index: llvm/test/Other/new-pm-defaults.ll =================================================================== --- llvm/test/Other/new-pm-defaults.ll +++ llvm/test/Other/new-pm-defaults.ll @@ -156,6 +156,7 @@ ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass +; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass Index: llvm/test/Other/new-pm-lto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-lto-defaults.ll +++ llvm/test/Other/new-pm-lto-defaults.ll @@ -81,6 +81,7 @@ ; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass ; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis ; CHECK-O23SZ-NEXT: Running pass: SROAPass on foo +; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass on foo ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass on foo ; CHECK-O23SZ-NEXT: Running pass: PostOrderFunctionAttrsPass on (foo) ; CHECK-O23SZ-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA Index: llvm/test/Other/new-pm-thinlto-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-defaults.ll +++ llvm/test/Other/new-pm-thinlto-defaults.ll @@ -122,6 +122,7 @@ ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass +; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass Index: llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -96,6 +96,7 @@ ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass +; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass Index: llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -105,6 +105,7 @@ ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass +; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass Index: llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -134,6 +134,7 @@ ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: PGOMemOPSizeOpt ; CHECK-O3-NEXT: Running pass: PGOMemOPSizeOpt +; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass Index: llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll =================================================================== --- llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -100,6 +100,7 @@ ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass ; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass +; CHECK-O23SZ-NEXT: Running pass: TailCallMarkPass ; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ReassociatePass Index: llvm/test/Transforms/Inline/byval-tail-call.ll =================================================================== --- llvm/test/Transforms/Inline/byval-tail-call.ll +++ llvm/test/Transforms/Inline/byval-tail-call.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -basic-aa -tailcallelim -inline -instcombine -dse -S | FileCheck %s -; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(tailcallelim),cgscc(inline,function(instcombine,dse))' -S | FileCheck %s +; RUN: opt < %s -basic-aa -tailcallmark -tailcallelim -inline -instcombine -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(tailcallmark,tailcallelim),cgscc(inline,function(instcombine,dse))' -S | FileCheck %s ; PR7272 ; Calls that capture byval parameters cannot be marked as tail calls. Other Index: llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll =================================================================== --- llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll +++ llvm/test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s ; PR7328 ; PR7506 define i32 @test1_constants(i32 %x) { Index: llvm/test/Transforms/TailCallElim/EraseBB.ll =================================================================== --- llvm/test/Transforms/TailCallElim/EraseBB.ll +++ llvm/test/Transforms/TailCallElim/EraseBB.ll @@ -1,4 +1,4 @@ -; RUN: opt -passes=tailcallelim -verify-dom-info -S < %s 2>&1 | FileCheck %s +; RUN: opt -passes=tailcallmark,tailcallelim -verify-dom-info -S < %s 2>&1 | FileCheck %s ; CHECK: add nsw i32 ; CHECK-NEXT: br label Index: llvm/test/Transforms/TailCallElim/accum_recursion.ll =================================================================== --- llvm/test/Transforms/TailCallElim/accum_recursion.ll +++ llvm/test/Transforms/TailCallElim/accum_recursion.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s define i32 @test1_factorial(i32 %x) { entry: Index: llvm/test/Transforms/TailCallElim/ackermann.ll =================================================================== --- llvm/test/Transforms/TailCallElim/ackermann.ll +++ llvm/test/Transforms/TailCallElim/ackermann.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; This function contains two tail calls, which should be eliminated -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -stats -disable-output 2>&1 | grep "2 tailcallelim" +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -stats -disable-output 2>&1 | grep "2 tailcallelim" define i32 @Ack(i32 %M.1, i32 %N.1) { entry: Index: llvm/test/Transforms/TailCallElim/basic.ll =================================================================== --- llvm/test/Transforms/TailCallElim/basic.ll +++ llvm/test/Transforms/TailCallElim/basic.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s declare void @noarg() declare void @use(i32*) Index: llvm/test/Transforms/TailCallElim/debugloc.ll =================================================================== --- llvm/test/Transforms/TailCallElim/debugloc.ll +++ llvm/test/Transforms/TailCallElim/debugloc.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=debugify,tailcallelim -S | FileCheck %s +; RUN: opt < %s -passes=debugify,tailcallmark,tailcallelim -S | FileCheck %s define void @foo() { entry: Index: llvm/test/Transforms/TailCallElim/deopt-bundle.ll =================================================================== --- llvm/test/Transforms/TailCallElim/deopt-bundle.ll +++ llvm/test/Transforms/TailCallElim/deopt-bundle.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s define i32 @f_1(i32 %x) { ; CHECK-LABEL: @f_1( Index: llvm/test/Transforms/TailCallElim/dont_reorder_load.ll =================================================================== --- llvm/test/Transforms/TailCallElim/dont_reorder_load.ll +++ llvm/test/Transforms/TailCallElim/dont_reorder_load.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | grep call | count 4 +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | grep call | count 4 ; PR4323 ; Several cases where tail call elimination should not move the load above the Index: llvm/test/Transforms/TailCallElim/dup_tail.ll =================================================================== --- llvm/test/Transforms/TailCallElim/dup_tail.ll +++ llvm/test/Transforms/TailCallElim/dup_tail.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts ; Duplicate the return into if.end to enable TCE. -; RUN: opt -passes=tailcallelim -verify-dom-info -stats -disable-output < %s 2>&1 | FileCheck %s +; RUN: opt -passes=tailcallmark,tailcallelim -verify-dom-info -stats -disable-output < %s 2>&1 | FileCheck %s ; CHECK: Number of return duplicated Index: llvm/test/Transforms/TailCallElim/inf-recursion.ll =================================================================== --- llvm/test/Transforms/TailCallElim/inf-recursion.ll +++ llvm/test/Transforms/TailCallElim/inf-recursion.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s ; Don't turn this into an infinite loop, this is probably the implementation ; of fabs and we expect the codegen to lower fabs. Index: llvm/test/Transforms/TailCallElim/notail.ll =================================================================== --- llvm/test/Transforms/TailCallElim/notail.ll +++ llvm/test/Transforms/TailCallElim/notail.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s ; CHECK: tail call void @callee0() ; CHECK: notail call void @callee1() Index: llvm/test/Transforms/TailCallElim/opt-remarks-recursion.ll =================================================================== --- llvm/test/Transforms/TailCallElim/opt-remarks-recursion.ll +++ llvm/test/Transforms/TailCallElim/opt-remarks-recursion.ll @@ -1,5 +1,5 @@ -; RUN: opt %s -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s -; RUN: opt %s -o /dev/null -passes='require,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s +; RUN: opt %s -tailcallmark -tailcallelim -verify-dom-info -pass-remarks=tailcallelim -o /dev/null 2>&1 | FileCheck %s +; RUN: opt %s -o /dev/null -passes='require,tailcallmark,tailcallelim' -pass-remarks=tailcallelim 2>&1 | FileCheck %s ; CHECK: /home/davide/pat.c:2:20: transforming tail recursion into loop define i32 @fib(i32 %n) nounwind ssp { Index: llvm/test/Transforms/TailCallElim/reorder_load.ll =================================================================== --- llvm/test/Transforms/TailCallElim/reorder_load.ll +++ llvm/test/Transforms/TailCallElim/reorder_load.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s ; PR4323 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/Transforms/TailCallElim/setjmp.ll =================================================================== --- llvm/test/Transforms/TailCallElim/setjmp.ll +++ llvm/test/Transforms/TailCallElim/setjmp.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s ; Test that we don't tail call in a functions that calls returns_twice ; functions. Index: llvm/test/Transforms/TailCallElim/tre-byval-parameter-2.ll =================================================================== --- llvm/test/Transforms/TailCallElim/tre-byval-parameter-2.ll +++ llvm/test/Transforms/TailCallElim/tre-byval-parameter-2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s ; the test was generated from the following C++ source: ; Index: llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll =================================================================== --- llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll +++ llvm/test/Transforms/TailCallElim/tre-byval-parameter.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s ; the test was generated from the following C++ source: ; Index: llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll =================================================================== --- llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll +++ llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s ; This test checks that TRE would be done for only one recursive call. ; The test_multiple_exits function has three recursive calls. Index: llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll =================================================================== --- llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll +++ llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s +; RUN: opt < %s -passes=tailcallmark,tailcallelim -verify-dom-info -S | FileCheck %s ; IR for that test was generated from the following C++ source: ; Index: polly/lib/CodeGen/CodegenCleanup.cpp =================================================================== --- polly/lib/CodeGen/CodegenCleanup.cpp +++ polly/lib/CodeGen/CodegenCleanup.cpp @@ -73,6 +73,7 @@ FPM->add(createCFGSimplificationPass()); FPM->add(createInstructionCombiningPass(true)); FPM->add(createLibCallsShrinkWrapPass()); + FPM->add(createTailCallMarkingPass()); FPM->add(createTailCallEliminationPass()); FPM->add(createCFGSimplificationPass()); FPM->add(createReassociatePass()); Index: polly/lib/Transform/Canonicalization.cpp =================================================================== --- polly/lib/Transform/Canonicalization.cpp +++ polly/lib/Transform/Canonicalization.cpp @@ -27,6 +27,7 @@ #include "llvm/Transforms/Scalar/LoopRotation.h" #include "llvm/Transforms/Scalar/Reassociate.h" #include "llvm/Transforms/Scalar/SimplifyCFG.h" +#include "llvm/Transforms/Scalar/TailCallMarking.h" #include "llvm/Transforms/Scalar/TailRecursionElimination.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/Mem2Reg.h" @@ -45,6 +46,7 @@ PM.add(llvm::createEarlyCSEPass(UseMemSSA)); PM.add(llvm::createInstructionCombiningPass()); PM.add(llvm::createCFGSimplificationPass()); + PM.add(llvm::createTailCallMarkingPass()); PM.add(llvm::createTailCallEliminationPass()); PM.add(llvm::createCFGSimplificationPass()); PM.add(llvm::createReassociatePass()); @@ -100,6 +102,7 @@ FPM.addPass(EarlyCSEPass(UseMemSSA)); FPM.addPass(InstCombinePass()); FPM.addPass(SimplifyCFGPass()); + FPM.addPass(TailCallMarkPass()); FPM.addPass(TailCallElimPass()); FPM.addPass(SimplifyCFGPass()); FPM.addPass(ReassociatePass());