Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -63,6 +63,7 @@ void initializeTarget(PassRegistry&); void initializeAAEvalLegacyPassPass(PassRegistry&); +void initializeACDCELegacyPassPass(PassRegistry&); void initializeAddDiscriminatorsPass(PassRegistry&); void initializeADCELegacyPassPass(PassRegistry&); void initializeBDCELegacyPassPass(PassRegistry &); Index: include/llvm/Transforms/Scalar.h =================================================================== --- include/llvm/Transforms/Scalar.h +++ include/llvm/Transforms/Scalar.h @@ -75,15 +75,6 @@ //===----------------------------------------------------------------------===// // -// AggressiveDCE - This pass uses the SSA based Aggressive DCE algorithm. This -// algorithm assumes instructions are dead until proven otherwise, which makes -// it more successful are removing non-obviously dead instructions. -// -FunctionPass *createAggressiveDCEPass(); - - -//===----------------------------------------------------------------------===// -// // GuardWidening - An optimization over the @llvm.experimental.guard intrinsic // that (optimistically) combines multiple guards into one to have fewer checks // at runtime. @@ -93,6 +84,15 @@ //===----------------------------------------------------------------------===// // +// AggressiveControlDCE - This pass uses the SSA based Aggressive DCE algorithm. +// This algorithm assumes instructions are dead until proven otherwise, which +// makes it more successful at removing non-obviously dead instructions. This +// variant does not treat branches as automatically live +// +FunctionPass *createAggressiveDCEPass(bool RetainControlFlow = true); + +//===----------------------------------------------------------------------===// +// // BitTrackingDCE - This pass uses a bit-tracking DCE algorithm in order to // remove computations of dead bits. // Index: include/llvm/Transforms/Scalar/ADCE.h =================================================================== --- include/llvm/Transforms/Scalar/ADCE.h +++ include/llvm/Transforms/Scalar/ADCE.h @@ -1,4 +1,4 @@ -//===- ADCE.h - Aggressive dead code elimination --------------------------===// +//===- ACDCE.h - Aggressive control dead code elimination -----------------===// // // The LLVM Compiler Infrastructure // @@ -7,30 +7,35 @@ // //===----------------------------------------------------------------------===// // -// This file provides the interface for the Aggressive Dead Code Elimination -// pass. This pass optimistically assumes that all instructions are dead until -// proven otherwise, allowing it to eliminate dead computations that other DCE -// passes do not catch, particularly involving loop computations. +// This file provides the interface for the Aggressive Dead Code +// Elimination pass. This pass optimistically assumes that all instructions are +// dead until proven otherwise, allowing it to eliminate dead computations that +// other DCE passes do not catch, particularly involving loop computations. // //===----------------------------------------------------------------------===// -#ifndef LLVM_TRANSFORMS_SCALAR_ADCE_H -#define LLVM_TRANSFORMS_SCALAR_ADCE_H +#ifndef LLVM_TRANSFORMS_SCALAR_ACDCE_H +#define LLVM_TRANSFORMS_SCALAR_ACDCE_H #include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" namespace llvm { -/// A DCE pass that assumes instructions are dead until proven otherwise. +/// A DCE pass that assumes instructions including branches are dead until +/// proven otherwise. /// /// This pass eliminates dead code by optimistically assuming that all /// instructions are dead until proven otherwise. This allows it to eliminate /// dead computations that other DCE passes do not catch, particularly involving -/// loop computations. +/// loop computations. This pass augments the predecessor "Aggressive DCE" by +/// not treating all branch operations as automatically live and by altering the +/// control flow graph remove dead branching. struct ADCEPass : PassInfoMixin { - PreservedAnalyses run(Function &F); + bool RetainControlFlow; + ADCEPass(bool RetainControlFlow = true); + PreservedAnalyses run(Function &F, AnalysisManager &AM); }; } -#endif // LLVM_TRANSFORMS_SCALAR_ADCE_H +#endif // LLVM_TRANSFORMS_SCALAR_ACDCE_H Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -12,7 +12,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BasicAliasAnalysis.h" @@ -33,6 +32,7 @@ #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/GVN.h" @@ -41,55 +41,52 @@ using namespace llvm; static cl::opt -RunLoopVectorization("vectorize-loops", cl::Hidden, - cl::desc("Run the Loop vectorization passes")); + RunLoopVectorization("vectorize-loops", cl::Hidden, + cl::desc("Run the Loop vectorization passes")); static cl::opt -RunSLPVectorization("vectorize-slp", cl::Hidden, - cl::desc("Run the SLP vectorization passes")); + RunSLPVectorization("vectorize-slp", cl::Hidden, + cl::desc("Run the SLP vectorization passes")); static cl::opt -RunBBVectorization("vectorize-slp-aggressive", cl::Hidden, - cl::desc("Run the BB vectorization passes")); + RunBBVectorization("vectorize-slp-aggressive", cl::Hidden, + cl::desc("Run the BB vectorization passes")); -static cl::opt -UseGVNAfterVectorization("use-gvn-after-vectorization", - cl::init(false), cl::Hidden, - cl::desc("Run GVN instead of Early CSE after vectorization passes")); +static cl::opt UseGVNAfterVectorization( + "use-gvn-after-vectorization", cl::init(false), cl::Hidden, + cl::desc("Run GVN instead of Early CSE after vectorization passes")); static cl::opt ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization.")); -static cl::opt UseNewSROA("use-new-sroa", - cl::init(true), cl::Hidden, - cl::desc("Enable the new, experimental SROA pass")); - static cl::opt -RunLoopRerolling("reroll-loops", cl::Hidden, - cl::desc("Run the loop rerolling pass")); + UseNewSROA("use-new-sroa", cl::init(true), cl::Hidden, + cl::desc("Enable the new, experimental SROA pass")); + +static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, + cl::desc("Run the loop rerolling pass")); static cl::opt -RunFloat2Int("float-to-int", cl::Hidden, cl::init(true), - cl::desc("Run the float2int (float demotion) pass")); + RunFloat2Int("float-to-int", cl::Hidden, cl::init(true), + cl::desc("Run the float2int (float demotion) pass")); static cl::opt RunLoadCombine("combine-loads", cl::init(false), cl::Hidden, cl::desc("Run the load combining pass")); -static cl::opt -RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization", - cl::init(true), cl::Hidden, - cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " - "vectorizer instead of before")); +static cl::opt RunSLPAfterLoopVectorization( + "run-slp-after-loop-vectorization", cl::init(true), cl::Hidden, + cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " + "vectorizer instead of before")); -static cl::opt UseCFLAA("use-cfl-aa", - cl::init(false), cl::Hidden, - cl::desc("Enable the new, experimental CFL alias analysis")); +static cl::opt + UseCFLAA("use-cfl-aa", cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis")); static cl::opt -EnableMLSM("mlsm", cl::init(true), cl::Hidden, - cl::desc("Enable motion of merged load and store")); + EnableMLSM("mlsm", cl::init(true), cl::Hidden, + cl::desc("Enable motion of merged load and store")); static cl::opt EnableLoopInterchange( "enable-loopinterchange", cl::init(false), cl::Hidden, @@ -100,9 +97,9 @@ cl::desc( "Enable the GlobalsModRef AliasAnalysis outside of the LTO pipeline.")); -static cl::opt EnableLoopLoadElim( - "enable-loop-load-elim", cl::init(true), cl::Hidden, - cl::desc("Enable the LoopLoadElimination Pass")); +static cl::opt + EnableLoopLoadElim("enable-loop-load-elim", cl::init(true), cl::Hidden, + cl::desc("Enable the LoopLoadElimination Pass")); static cl::opt RunPGOInstrGen( "profile-generate", cl::init(""), cl::Hidden, @@ -119,27 +116,27 @@ cl::desc("Enable the experimental Loop Versioning LICM pass")); PassManagerBuilder::PassManagerBuilder() { - OptLevel = 2; - SizeLevel = 0; - LibraryInfo = nullptr; - Inliner = nullptr; - ModuleSummary = nullptr; - DisableUnitAtATime = false; - DisableUnrollLoops = false; - BBVectorize = RunBBVectorization; - SLPVectorize = RunSLPVectorization; - LoopVectorize = RunLoopVectorization; - RerollLoops = RunLoopRerolling; - LoadCombine = RunLoadCombine; - DisableGVNLoadPRE = false; - VerifyInput = false; - VerifyOutput = false; - MergeFunctions = false; - PrepareForLTO = false; - PGOInstrGen = RunPGOInstrGen; - PGOInstrUse = RunPGOInstrUse; - PrepareForThinLTO = false; - PerformThinLTO = false; + OptLevel = 2; + SizeLevel = 0; + LibraryInfo = nullptr; + Inliner = nullptr; + ModuleSummary = nullptr; + DisableUnitAtATime = false; + DisableUnrollLoops = false; + BBVectorize = RunBBVectorization; + SLPVectorize = RunSLPVectorization; + LoopVectorize = RunLoopVectorization; + RerollLoops = RunLoopRerolling; + LoadCombine = RunLoadCombine; + DisableGVNLoadPRE = false; + VerifyInput = false; + VerifyOutput = false; + MergeFunctions = false; + PrepareForLTO = false; + PGOInstrGen = RunPGOInstrGen; + PGOInstrUse = RunPGOInstrUse; + PrepareForThinLTO = false; + PerformThinLTO = false; } PassManagerBuilder::~PassManagerBuilder() { @@ -149,7 +146,9 @@ /// Set of global extensions, automatically added as part of the standard set. static ManagedStatic, 8> > GlobalExtensions; + PassManagerBuilder::ExtensionFn>, + 8>> + GlobalExtensions; void PassManagerBuilder::addGlobalExtension( PassManagerBuilder::ExtensionPointTy Ty, @@ -196,7 +195,8 @@ if (LibraryInfo) FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - if (OptLevel == 0) return; + if (OptLevel == 0) + return; addInitialAliasAnalysisPasses(FPM); @@ -229,56 +229,56 @@ MPM.add(createSROAPass()); else MPM.add(createScalarReplAggregatesPass(-1, false)); - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); - MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Combine silly seq's addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createReassociatePass()); // Reassociate expressions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); - MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLICMPass()); // Hoist loop invariants MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3)); MPM.add(createCFGSimplificationPass()); addInstructionCombiningPass(MPM); - MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars - MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. - MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createLoopDeletionPass()); // Delete dead loops if (EnableLoopInterchange) { MPM.add(createLoopInterchangePass()); // Interchange loops MPM.add(createCFGSimplificationPass()); } if (!DisableUnrollLoops) - MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops + MPM.add(createSimpleLoopUnrollPass()); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); if (OptLevel > 1) { if (EnableMLSM) MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds - MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies + MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies } - MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset - MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP // Delete dead bit computations (instcombine runs after to fold away the dead // computations, and then ADCE will run later to exploit any new DCE // opportunities that creates). - MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores MPM.add(createLICMPass()); addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -287,7 +287,7 @@ MPM.add(createLoopRerollPass()); if (!RunSLPAfterLoopVectorization) { if (SLPVectorize) - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. if (BBVectorize) { MPM.add(createBBVectorizePass()); @@ -296,7 +296,7 @@ if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies else - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies // BBVectorize may have significantly shortened a loop body; unroll again. if (!DisableUnrollLoops) @@ -307,7 +307,10 @@ if (LoadCombine) MPM.add(createLoadCombinePass()); - MPM.add(createAggressiveDCEPass()); // Delete dead instructions + bool RetainControlFlow = (OptLevel <= 2); + MPM.add( + createAggressiveDCEPass(RetainControlFlow)); // Delete dead instructions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. addInstructionCombiningPass(MPM); @@ -437,8 +440,8 @@ // size. By placing it just after inlining other optimizations which runs // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { - MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + MPM.add(createLICMPass()); // Hoist loop invariants } if (EnableNonLTOGlobalsModRef) @@ -506,7 +509,7 @@ if (RunSLPAfterLoopVectorization) { if (SLPVectorize) { - MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. if (OptLevel > 1 && ExtraVectorizerPasses) { MPM.add(createEarlyCSEPass()); } @@ -519,7 +522,7 @@ if (OptLevel > 1 && UseGVNAfterVectorization) MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies else - MPM.add(createEarlyCSEPass()); // Catch trivial redundancies + MPM.add(createEarlyCSEPass()); // Catch trivial redundancies // BBVectorize may have significantly shortened a loop body; unroll again. if (!DisableUnrollLoops) @@ -532,7 +535,7 @@ addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { - MPM.add(createLoopUnrollPass()); // Unroll small loops + MPM.add(createLoopUnrollPass()); // Unroll small loops // LoopUnroll may generate some redundency to cleanup. addInstructionCombiningPass(MPM); @@ -558,8 +561,8 @@ // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. if (OptLevel > 1) { - MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. - MPM.add(createConstantMergePass()); // Merge dup global constants + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants } } @@ -637,7 +640,7 @@ Inliner = nullptr; } - PM.add(createPruneEHPass()); // Remove dead EH info. + PM.add(createPruneEHPass()); // Remove dead EH info. // Optimize globals again if we ran the inliner. if (RunInliner) @@ -661,13 +664,13 @@ // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. - PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. + PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createLICMPass()); // Hoist loop invariants. if (EnableMLSM) PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. - PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. - PM.add(createMemCpyOptPass()); // Remove dead memcpys. + PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); @@ -679,7 +682,7 @@ PM.add(createLoopInterchangePass()); if (!DisableUnrollLoops) - PM.add(createSimpleLoopUnrollPass()); // Unroll small loops + PM.add(createSimpleLoopUnrollPass()); // Unroll small loops PM.add(createLoopVectorizePass(true, LoopVectorize)); // The vectorizer may have significantly shortened a loop body; unroll again. if (!DisableUnrollLoops) @@ -688,10 +691,10 @@ // Now that we've optimized loops (in particular loop induction variables), // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. - addInstructionCombiningPass(PM); // Initial cleanup + addInstructionCombiningPass(PM); // Initial cleanup PM.add(createCFGSimplificationPass()); // if-convert - PM.add(createSCCPPass()); // Propagate exposed constants - addInstructionCombiningPass(PM); // Clean up again + PM.add(createSCCPPass()); // Propagate exposed constants + addInstructionCombiningPass(PM); // Clean up again PM.add(createBitTrackingDCEPass()); // More scalar chains could be vectorized due to more alias information @@ -774,7 +777,7 @@ } inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { - return reinterpret_cast(P); + return reinterpret_cast(P); } inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { @@ -791,58 +794,50 @@ delete Builder; } -void -LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, - unsigned OptLevel) { +void LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, + unsigned OptLevel) { PassManagerBuilder *Builder = unwrap(PMB); Builder->OptLevel = OptLevel; } -void -LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, - unsigned SizeLevel) { +void LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, + unsigned SizeLevel) { PassManagerBuilder *Builder = unwrap(PMB); Builder->SizeLevel = SizeLevel; } -void -LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, - LLVMBool Value) { +void LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { PassManagerBuilder *Builder = unwrap(PMB); Builder->DisableUnitAtATime = Value; } -void -LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, - LLVMBool Value) { +void LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { PassManagerBuilder *Builder = unwrap(PMB); Builder->DisableUnrollLoops = Value; } -void -LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, - LLVMBool Value) { +void LLVMPassManagerBuilderSetDisableSimplifyLibCalls( + LLVMPassManagerBuilderRef PMB, LLVMBool Value) { // NOTE: The simplify-libcalls pass has been removed. } -void -LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, - unsigned Threshold) { +void LLVMPassManagerBuilderUseInlinerWithThreshold( + LLVMPassManagerBuilderRef PMB, unsigned Threshold) { PassManagerBuilder *Builder = unwrap(PMB); Builder->Inliner = createFunctionInliningPass(Threshold); } -void -LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, - LLVMPassManagerRef PM) { +void LLVMPassManagerBuilderPopulateFunctionPassManager( + LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM) { PassManagerBuilder *Builder = unwrap(PMB); legacy::FunctionPassManager *FPM = unwrap(PM); Builder->populateFunctionPassManager(*FPM); } -void -LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, - LLVMPassManagerRef PM) { +void LLVMPassManagerBuilderPopulateModulePassManager( + LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM) { PassManagerBuilder *Builder = unwrap(PMB); legacy::PassManagerBase *MPM = unwrap(PM); Builder->populateModulePassManager(*MPM); Index: lib/Transforms/Scalar/ADCE.cpp =================================================================== --- lib/Transforms/Scalar/ADCE.cpp +++ lib/Transforms/Scalar/ADCE.cpp @@ -1,4 +1,4 @@ -//===- ADCE.cpp - Code to perform dead code elimination -------------------===// +//===- ACDCE.cpp - Code to perform dead code elimination ------------------===// // // The LLVM Compiler Infrastructure // @@ -10,16 +10,22 @@ // This file implements the Aggressive Dead Code Elimination pass. This pass // optimistically assumes that all instructions are dead until proven otherwise, // allowing it to eliminate dead computations that other DCE passes do not -// catch, particularly involving loop computations. +// catch by not assuming that control operations are live. Under option, it will +// also remove may-be-infinite loops (legal in C++, not generally in Java) // //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/ADCE.h" -#include "llvm/ADT/DepthFirstIterator.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFGPrinter.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -28,15 +34,528 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Transforms/Scalar.h" + using namespace llvm; #define DEBUG_TYPE "adce" +#define DEBUG_MSG(x) DEBUG(dbgs() << x << '\n';) + +static cl::opt RemoveControlFlow("adce-remove-control-flow", + cl::init(false), cl::Hidden); +static cl::opt RemoveLoops("adce-remove-loops", cl::init(false), + cl::Hidden); + +STATISTIC(NumRemoved, "Number of non-branch instructions removed"); +STATISTIC(NumBranchesRemoved, "Number of branch instructions removed"); + +namespace llvm { + +// Specialize po_iterator_storage to identify loop bottoms +// which we define simply as back edges in a depth first +// traversal of the CFG starting at the entry. +class PO_WithLoops; + +template <> class po_iterator_storage { + PO_WithLoops &Storage; + +public: + po_iterator_storage(PO_WithLoops &Storage) : Storage(Storage) {} + // These functions are defined below. + bool insertEdge(BasicBlock *From, BasicBlock *To); + void finishPostorder(BasicBlock *BB); +}; + +class PO_WithLoops { + typedef SmallPtrSet LoopBottomsType; + typedef po_iterator POTIterator; + + Function &F; + // Visited[BB] is true while the basic block *BB is on + // the stack of being visited nodes and false after + // all children have been visited + DenseMap Visited; + LoopBottomsType LoopBottoms; + +public: + PO_WithLoops(Function &F) : F(F) { Visited.grow(2 * F.size()); } + POTIterator begin() { return po_ext_begin(&F.getEntryBlock(), *this); } + POTIterator end() { return po_ext_end(&F.getEntryBlock(), *this); } + bool insertEdge(BasicBlock *From, BasicBlock *To) { + auto P = Visited.insert(std::make_pair(To, true)); + if (P.second) { + return true; + } + if (P.first->second) { + // back edge to an ancestor in DFS + LoopBottoms.insert(From); + } + return false; + } + + void finishPostorder(BasicBlock *BB) { Visited[BB] = false; } + bool reachable(BasicBlock *BB) const { + return (Visited.find(BB) != Visited.end()); + } + const LoopBottomsType &getLoopBottoms() { return LoopBottoms; } +}; + +bool po_iterator_storage::insertEdge(BasicBlock *From, + BasicBlock *To) { + return Storage.insertEdge(From, To); +} +void po_iterator_storage::finishPostorder(BasicBlock *BB) { + Storage.finishPostorder(BB); +} +} + +namespace { + +bool isUnconditional(TerminatorInst *Term) { + auto BR = dyn_cast(Term); + return BR && BR->isUnconditional(); +} + +struct BlockInfoType; +/// A Dead Region is a set of dead basic blocks with a common +/// post-dominating live block +struct DeadRegionInfo { + /// true when all blocks have unconditional branches + bool Trivial = true; // dead blocks are just unconditional branches + /// the set of dead blocks in this region + SmallVector DeadBlocks; + /// sources of edges from live nodes into dead ones of this region + SmallPtrSet LivePreds; +}; + +/// Information about Instruction's +struct InstInfoType { + + bool Live = false; ///< true if this unstruction is live + + /// quick access to information for block containing associated Instruction + struct BlockInfoType *Block = nullptr; +}; + +/// Information about basic blocks relevant to dead code elimination +struct BlockInfoType { + /// true when this block contains a live instructions + bool Live = false; + /// this block ends in an unconditional branch + bool UnconditionalBranch = false; + + /// quick access to the LiveInfo for the terminator + InstInfoType *TerminatorLiveInfo = nullptr; // == & InstInfo[Terminator] + + bool terminatorIsLive() const { return TerminatorLiveInfo->Live; } + + /// corresponding BasicBlock + BasicBlock *BB = nullptr; -STATISTIC(NumRemoved, "Number of instructions removed"); + /// points to this BB when this block is live, otherwise the most immediate + /// live post-dominator of BB + BasicBlock *LivePostDom = nullptr; -static void collectLiveScopes(const DILocalScope &LS, - SmallPtrSetImpl &AliveScopes) { + TerminatorInst *Terminator = nullptr; ///< cache of BB->getTerminator() + + /// block found to not reach a function return point + bool isUnreachable() const { return Terminator == nullptr; } + + /// the collection of dead blocks which have this block as + /// the most-immediate live post-dominator + DeadRegionInfo DeadRegion; + + /// used to control exploration of dead connected components + unsigned RegionVisited = 0; +}; + +/// This class implements analysis to determine instructions including branching +/// which does not effect the output. Such operations are removed as are +/// unreachable operations for some scenarions. Dead loops are optionally +/// removed +/// or retained. +class AggressiveDCE { + Function &F; + + /// reverse partial order of blocks with a collection of sources of backward + /// edges referred to as loop bottoms + PO_WithLoops PO; + PostDominatorTree *PDT; // Will be null when we retain control flow + bool GraphWasModified = false; + + /// function-level statistics + unsigned FuncNumBranchesRemoved = 0; + unsigned FuncNumInstRemoved = 0; + + /// one per basic block in the function + std::vector BlockInfoVec; + + /// mapping of blocks to associated information, element in BlockInfoVec + DenseMap BlockInfo; + bool isLive(BasicBlock *BB) { return BlockInfo[BB]->Live; } + + /// mapping of instructions to associated information + DenseMap InstInfo; + bool isLive(Instruction *I) { return InstInfo[I].Live; } + + /// initialize maps and BlockInfoVec + void initialize(); + + /// live instructions to be processed or dead instructions to be deleted + SmallVector Worklist; + SmallPtrSet AliveScopes; + + /// Propagate "live-ness" to definitions which reach "live" instructions + void markLive(); + /// Mark I and its containing block as live. + void markLive(Instruction *I); + void collectLiveScopes(const DILocalScope &LS); + void collectLiveScopes(const DILocation &DL); + + /// true for any instructions which is always treated as live + bool alwaysLive(Instruction &I) const; + + /// vector of blocks with not known to be live terminators, + SmallPtrSet BlocksWithDeadTerminators; + + /// The set of blocks which we have determined are live in the + /// most recent pass + SmallPtrSet NewLiveBlocks; + + /// Analyze basic blocks to find to loop bottoms to keep live and + /// remove unreachable blocks + void analyzeBlocks(); + + /// blocks found to be unreachable when searching for back edges + SmallVector UnreachableBlocks; + void removeUnreachable(BasicBlock *); + + /// Analyze dead branches to find those whose branches are the sources + /// of control dependences impacting a live block. Those branches are + /// marked live. + void checkControlDependences(); + + /// look for phi nodes with complex value which force + /// source blocks of reaching values to be treated as live. + void checkPhiNodes(); + + typedef SmallPtrSet RegionSetType; + + /// Traverse the connected component associated with BBInfo where + /// nodes whose RegionVisited field is greater than FirstVisit are considered + /// already 'visited' and we stop at tops of live blocks. + /// Set RegionVisited for blocks newly visited to CurrentVisit. + /// Record live blocks that are successors of DeadBlocks on this + /// componente in LivePdoms + void visitRegion(BlockInfoType *, unsigned FirstVisit, unsigned CurrentVisit, + RegionSetType *LivePdoms); + + /// Incremented for each new traversal of connected components + unsigned LastCurrentVisit = 0; + + /// Verify that all edges from "dead" predecessors into PN + /// have the same associated value. To refine the precision, the + /// dead blocks are partitioned into connnected components and + /// all definitions associated with predecessor blocks in the same + /// connected component must be the same + void checkPhiNode(PHINode *PN, unsigned FirstVisit); + + /// find groups of dead blocks with common live post-dominator + /// and rewrite the control flow graph + void processDeadRegions(); + + /// Rewrite non-trivial dead regions by redirection incoming live + /// control flow to the post-dominator and updating phi-nnodes in the + /// post-dominator + void updateDeadRegion(BasicBlock *BB, DeadRegionInfo *Info); + + /// adjust BB so it has an unconditional branch to Target + /// mark that branch live. + void makeUnconditional(BasicBlock *BB, BasicBlock *Target); + + /// the collection of unconditional branch instructions created + /// by makeUnconditional + SmallVector NewBranches; + + /// for trivial regions, we just mark all the branches as live to be + /// cleaned up by other phases + void cleanTrivialRegion(const DeadRegionInfo &Info); + + void removeDeadCode(); + +#ifndef NDEBUG + /// the set of live blocks with dead predecessors + SmallPtrSet BlocksForPhiValidation; + + /// verify one value for each predecessor after dead blocks are removed + void validatePhiNodes(BasicBlock *); +#endif + +public: + AggressiveDCE(Function &F, PostDominatorTree *PDT) : F(F), PO(F), PDT(PDT) {} + bool doAggressiveCDCE(); +}; +} + +bool AggressiveDCE::doAggressiveCDCE() { + + initialize(); + markLive(); + processDeadRegions(); + removeDeadCode(); + + return GraphWasModified; +} + +//===----------------------------------------------------------------------===// +// +// Initialize of data structures and identification of +// loops and unreachable code +// +//===----------------------------------------------------------------------===// +void AggressiveDCE::initialize() { + + DEBUG(if (PDT) PDT->print(dbgs())); + + auto NumBlocks = F.size(); + BlockInfoVec.resize(NumBlocks); + BlockInfo.grow(2 * NumBlocks); + auto InfoP = BlockInfoVec.begin(); + size_t NumInsts = 0; + for (auto &BB : F) { + NumInsts += BB.size(); + auto &Info = *InfoP; + BlockInfo[&BB] = &Info; + DEBUG_MSG("initializing block " << BB.getName()); + Info.BB = &BB; + Info.Terminator = BB.getTerminator(); + Info.UnconditionalBranch = isUnconditional(Info.Terminator); + ++InfoP; + } + if (!RemoveLoops) { + // remove unreachable code, identify loop bottoms + // to be forced live below. + analyzeBlocks(); + } + InstInfo.grow(2 * NumInsts); + for (auto &BBInfo : BlockInfoVec) { + if (BBInfo.isUnreachable()) { // removed unreachable node + continue; + } + for (Instruction &I : *BBInfo.BB) { + auto &IInfo = InstInfo[&I]; + IInfo.Block = &BBInfo; + if (alwaysLive(I)) { + markLive(&I); + } + } + } + + // For blocks which do not reach a function return, mark + // them live. Also branches where some reach the return + // and some done as live. + if (PDT) { + for (auto &BBInfo : BlockInfoVec) { + if (!BBInfo.isUnreachable() && !PDT->getNode(BBInfo.BB)) { + markLive(BBInfo.Terminator); + for (auto Pred : predecessors(BBInfo.BB)) { + DEBUG_MSG("no pdom pred live: " << Pred->getName()); + markLive(Pred->getTerminator()); + } + } + } + } + + // mark all loop bottom terminators as live so loops are not + // deleted (in case they are infinite) + for (auto BB : PO.getLoopBottoms()) { + DEBUG_MSG("loop bottom " << BB->getName()); + markLive(BB->getTerminator()); + } + + for (auto &BBInfo : BlockInfoVec) { + BBInfo.TerminatorLiveInfo = &InstInfo[BBInfo.Terminator]; + if (!BBInfo.isUnreachable() && !BBInfo.TerminatorLiveInfo->Live) { + BlocksWithDeadTerminators.insert(BBInfo.BB); + } + } + + // Treat the entry block as always live + auto BB = &F.getEntryBlock(); + auto &EntryInfo = *BlockInfo[BB]; + EntryInfo.Live = true; + EntryInfo.LivePostDom = BB; + if (EntryInfo.UnconditionalBranch) { + EntryInfo.TerminatorLiveInfo->Live = true; + BlocksWithDeadTerminators.erase(BB); + } +} + +// Find and remove unreachable code and compute the bottoms of loops +// which will be forced live so loops are retained. +void AggressiveDCE::analyzeBlocks() { + + // This loop drives a Post Order traversal of the + // control flow graph which will determine the set + // of blocks reachable from the entry and also mark + // those which are the sources of back edges (loop bottoms) + for (auto BB : PO) { + (void)BB; + } + + for (auto &BB : F) { + if (PO.reachable(&BB)) { + continue; + } + DEBUG_MSG("unreachable " << BB.getName()); + removeUnreachable(&BB); + UnreachableBlocks.push_back(&BB); + } + if (Worklist.empty()) { + return; + } + GraphWasModified = true; + for (auto I : Worklist) { + I->eraseFromParent(); + } + Worklist.clear(); +} + +// since we go to the trouble of finding unreachable blocks +// might as well remove them. +void AggressiveDCE::removeUnreachable(BasicBlock *BB) { + auto &Info = *BlockInfo[BB]; + Info.Live = true; + Info.Terminator = nullptr; + + while (PHINode *PN = dyn_cast(BB->begin())) { + PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); + BB->getInstList().pop_front(); + } + for (auto Succ : successors(BB)) { + Succ->removePredecessor(BB); + } + BB->dropAllReferences(); + for (Instruction &I : *BB) { + Worklist.push_back(&I); + } +} + +//===----------------------------------------------------------------------===// +// +// Core algorithm to propagate liveness +// +//===----------------------------------------------------------------------===// +void AggressiveDCE::markLive() { + + DEBUG_MSG("propagate live\n"); + + // Propagate liveness backwards to operands. + do { + while (!Worklist.empty()) { + Instruction *Curr = Worklist.pop_back_val(); + DEBUG(dbgs() << "work live: "; Curr->dump();); + for (Use &OI : Curr->operands()) { + if (Instruction *Inst = dyn_cast(OI)) { + markLive(Inst); + } + } + } + checkControlDependences(); + if (Worklist.empty()) { + checkPhiNodes(); + } + } while (!Worklist.empty()); + + // Find blocks with dead terminators which are simply + // unconditional branches and mark them live to avoid + // trivial edits + SmallVector NowLiveBlocks; + for (auto BB : BlocksWithDeadTerminators) { + auto Info = BlockInfo[BB]; + if (!Info->Live && Info->UnconditionalBranch && + &Info->BB->front() == Info->Terminator) { + if (auto Pred = Info->BB->getSinglePredecessor()) { + if (isLive(Pred->getTerminator())) { + // this mostly avoids unnecessary work removing trival dead regions + DEBUG_MSG("unconditional from live pred " << Info->BB->getName()); + Info->Live = true; + Info->TerminatorLiveInfo->Live = true; + Info->LivePostDom = BB; + NowLiveBlocks.push_back(BB); + } + } + } + } + for (auto BB : NowLiveBlocks) { + BlocksWithDeadTerminators.erase(BB); + } +} + +// Check if this instruction is a runtime call for value profiling and +// if it's instrumenting a constant. +static bool isInstrumentsConstant(Instruction &I) { + if (CallInst *CI = dyn_cast(&I)) + if (Function *Callee = CI->getCalledFunction()) + if (Callee->getName().equals(getInstrProfValueProfFuncName())) + if (isa(CI->getArgOperand(0))) + return true; + return false; +} + +bool AggressiveDCE::alwaysLive(Instruction &I) const { + if (I.isEHPad() || I.mayHaveSideEffects()) { + if (isInstrumentsConstant(I)) + return false; + return true; + } + + if (!isa(I)) + return false; + + if (PDT && (isa(I) || isa(I))) + return false; + + return true; +} + +void AggressiveDCE::markLive(Instruction *I) { + auto &Info = InstInfo[I]; + if (Info.Live) { + return; + } + DEBUG(dbgs() << "mark live: "; I->dump()); + Info.Live = true; + Worklist.push_back(I); + + // Collect the live debug info scopes attached to this instruction. + if (const DILocation *DL = I->getDebugLoc()) + collectLiveScopes(*DL); + + auto &BBInfo = *Info.Block; + if (BBInfo.Live) { + return; + } + DEBUG_MSG("mark block live: " << BBInfo.BB->getName()); + BBInfo.Live = true; + NewLiveBlocks.insert(BBInfo.BB); + + BBInfo.LivePostDom = BBInfo.BB; + if (!BBInfo.UnconditionalBranch) { + return; + } + BlocksWithDeadTerminators.erase(BBInfo.BB); + + if (!BBInfo.TerminatorLiveInfo) { // not set in first initialization pass + InstInfo[BBInfo.Terminator].Live = true; + } else { + BBInfo.TerminatorLiveInfo->Live = true; + } +} + +void AggressiveDCE::collectLiveScopes(const DILocalScope &LS) { if (!AliveScopes.insert(&LS).second) return; @@ -44,140 +563,534 @@ return; // Tail-recurse through the scope chain. - collectLiveScopes(cast(*LS.getScope()), AliveScopes); + collectLiveScopes(cast(*LS.getScope())); } -static void collectLiveScopes(const DILocation &DL, - SmallPtrSetImpl &AliveScopes) { +void AggressiveDCE::collectLiveScopes(const DILocation &DL) { // Even though DILocations are not scopes, shove them into AliveScopes so we // don't revisit them. if (!AliveScopes.insert(&DL).second) return; // Collect live scopes from the scope chain. - collectLiveScopes(*DL.getScope(), AliveScopes); + collectLiveScopes(*DL.getScope()); // Tail-recurse through the inlined-at chain. if (const DILocation *IA = DL.getInlinedAt()) - collectLiveScopes(*IA, AliveScopes); + collectLiveScopes(*IA); } -// Check if this instruction is a runtime call for value profiling and -// if it's instrumenting a constant. -static bool isInstrumentsConstant(Instruction &I) { - if (CallInst *CI = dyn_cast(&I)) - if (Function *Callee = CI->getCalledFunction()) - if (Callee->getName().equals(getInstrProfValueProfFuncName())) - if (isa(CI->getArgOperand(0))) - return true; - return false; +// Use the IDFCalculator to find control dependences +// strting at newly discovered live blocks but stopping +// at blocks with live terminators. +void AggressiveDCE::checkControlDependences() { + + if (BlocksWithDeadTerminators.empty()) { + return; + } + + DEBUG({ + dbgs() << "new live blocks:\n"; + for (auto BB : NewLiveBlocks) { + dbgs() << "\t" << BB->getName() << '\n'; + } + dbgs() << "dead terminator blocks:\n"; + for (auto BB : BlocksWithDeadTerminators) { + dbgs() << "\t" << BB->getName() << '\n'; + } + }); + + SmallVector IDFBlocks; + ReverseIDFCalculator IDFs(*PDT); + IDFs.setDefiningBlocks(NewLiveBlocks); + IDFs.setLiveInBlocks(BlocksWithDeadTerminators); + IDFs.calculate(IDFBlocks); + NewLiveBlocks.clear(); + + // Dead terminators which control live blocks become live + for (auto BB : IDFBlocks) { + DEBUG_MSG("live control in: " << BB->getName()); + markLive(BB->getTerminator()); + BlocksWithDeadTerminators.erase(BB); + } } -static bool aggressiveDCE(Function& F) { - SmallPtrSet Alive; - SmallVector Worklist; +// There are scenarios in which a phi node is really short-hand +// for a copy operations on the edge and the semantics of the program +// rely on this. We look for situations where there are distinct values from +// blocks with dead terminators (which we assume are all replaced with +// a single edge) and if we find this case we mark some of those terminators +// as live. As a trivial example, the first branch should be marked as live +// to distinguish which constant value is selected by the phi. - // Collect the set of "root" instructions that are known live. - for (Instruction &I : instructions(F)) { - if (isa(I) || I.isEHPad() || I.mayHaveSideEffects()) { - // Skip any value profile instrumentation calls if they are - // instrumenting constants. - if (isInstrumentsConstant(I)) - continue; - Alive.insert(&I); - Worklist.push_back(&I); +// br i1 %1, label %2, label %3 +// ;