Index: include/llvm/Transforms/IPO/FunctionAttrs.h =================================================================== --- include/llvm/Transforms/IPO/FunctionAttrs.h +++ include/llvm/Transforms/IPO/FunctionAttrs.h @@ -18,6 +18,7 @@ #include "llvm/Analysis/CGSCCPassManager.h" #include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/IR/PassManager.h" namespace llvm { @@ -29,11 +30,7 @@ /// The three kinds of memory access relevant to 'readonly' and /// 'readnone' attributes. -enum MemoryAccessKind { - MAK_ReadNone = 0, - MAK_ReadOnly = 1, - MAK_MayWrite = 2 -}; +enum MemoryAccessKind { MAK_ReadNone = 0, MAK_ReadOnly = 1, MAK_MayWrite = 2 }; /// Returns the memory access properties of this copy of the function. MemoryAccessKind computeFunctionBodyMemoryAccess(Function &F, AAResults &AAR); Index: lib/Transforms/CMakeLists.txt =================================================================== --- lib/Transforms/CMakeLists.txt +++ lib/Transforms/CMakeLists.txt @@ -5,5 +5,6 @@ add_subdirectory(IPO) add_subdirectory(Vectorize) add_subdirectory(Hello) +add_subdirectory(Devirt) add_subdirectory(ObjCARC) add_subdirectory(Coroutines) Index: lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- lib/Transforms/IPO/FunctionAttrs.cpp +++ lib/Transforms/IPO/FunctionAttrs.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -223,8 +224,8 @@ // Non-exact function definitions may not be selected at link time, and an // alternative version that writes to memory may be selected. See the // comment on GlobalValue::isDefinitionExact for more details. - switch (checkFunctionMemoryAccess(*F, F->hasExactDefinition(), - AAR, SCCNodes)) { + switch ( + checkFunctionMemoryAccess(*F, F->hasExactDefinition(), AAR, SCCNodes)) { case MAK_MayWrite: return false; case MAK_ReadOnly: @@ -578,7 +579,7 @@ /// try to propagate attributes from the callsite's arguments to the parent's /// arguments. This may be important because inlining can cause information loss /// when attribute knowledge disappears with the inlined call. -static bool addArgumentAttrsFromCallsites(Function &F) { +static bool addArgumentAttrsFromCallsites(Function &F, PostDominatorTree &PDT) { if (!EnableNonnullArgPropagation) return false; @@ -586,39 +587,44 @@ // For an argument attribute to transfer from a callsite to the parent, the // call must be guaranteed to execute every time the parent is called. - // Conservatively, just check for calls in the entry block that are guaranteed - // to execute. - // TODO: This could be enhanced by testing if the callsite post-dominates the - // entry block or by doing simple forward walks or backward walks to the - // callsite. - BasicBlock &Entry = F.getEntryBlock(); - for (Instruction &I : Entry) { - if (auto CS = CallSite(&I)) { - if (auto *CalledFunc = CS.getCalledFunction()) { - for (auto &CSArg : CalledFunc->args()) { - if (!CSArg.hasNonNullAttr()) - continue; - - // If the non-null callsite argument operand is an argument to 'F' - // (the caller) and the call is guaranteed to execute, then the value - // must be non-null throughout 'F'. - auto *FArg = dyn_cast(CS.getArgOperand(CSArg.getArgNo())); - if (FArg && !FArg->hasNonNullAttr()) { - FArg->addAttr(Attribute::NonNull); - Changed = true; + // So we checks if the basic block containing the callsite post-dominates the + // entry block in which case it is guaranteed the call will be executed every + // time. + const BasicBlock &Entry = F.getEntryBlock(); + for (BasicBlock &B : F.getBasicBlockList()) { + if (PDT.dominates(&B, &Entry)) { + for (Instruction &I : B) { + if (auto CS = CallSite(&I)) { + if (auto *CalledFunc = CS.getCalledFunction()) { + for (auto &CSArg : CalledFunc->args()) { + if (!CSArg.hasNonNullAttr()) + continue; + + // If the non-null callsite argument operand is an argument to 'F' + // (the caller) and the call is guaranteed to execute, then the + // value must be non-null throughout 'F'. + auto *FArg = + dyn_cast(CS.getArgOperand(CSArg.getArgNo())); + if (FArg && !FArg->hasNonNullAttr()) { + FArg->addAttr(Attribute::NonNull); + Changed = true; + } + } } } + + if (!isGuaranteedToTransferExecutionToSuccessor(&I)) + break; } } - if (!isGuaranteedToTransferExecutionToSuccessor(&I)) - break; } - + return Changed; } /// Deduce nocapture attributes for the SCC. -static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) { +static bool addArgumentAttrs(const SCCNodeSet &SCCNodes, + PostDominatorTree &PDT) { bool Changed = false; ArgumentGraph AG; @@ -632,7 +638,7 @@ if (!F->hasExactDefinition()) continue; - Changed |= addArgumentAttrsFromCallsites(*F); + Changed |= addArgumentAttrsFromCallsites(*F, PDT); // Functions that are readonly (or readnone) and nounwind and don't return // a value can't capture arguments. Don't analyze them. @@ -892,8 +898,7 @@ bool MadeChange = false; for (Function *F : SCCNodes) { - if (F->returnDoesNotAlias() || - !F->getReturnType()->isPointerTy()) + if (F->returnDoesNotAlias() || !F->getReturnType()->isPointerTy()) continue; F->setReturnDoesNotAlias(); @@ -1046,14 +1051,16 @@ // * we can remove its convergent attribute. bool HasConvergentFn = false; for (Function *F : SCCNodes) { - if (!F->isConvergent()) continue; + if (!F->isConvergent()) + continue; HasConvergentFn = true; // Can't remove convergent from function declarations. - if (F->isDeclaration()) return false; + if (F->isDeclaration()) + return false; - // Can't remove convergent if any of our functions has a convergent call to a - // function not in the SCC. + // Can't remove convergent if any of our functions has a convergent call to + // a function not in the SCC. for (Instruction &I : instructions(*F)) { CallSite CS(&I); // Bail if CS is a convergent call to a function not in the SCC. @@ -1064,14 +1071,16 @@ } // If the SCC doesn't have any convergent functions, we have nothing to do. - if (!HasConvergentFn) return false; + if (!HasConvergentFn) + return false; // If we got here, all of the calls the SCC makes to functions not in the SCC // are non-convergent. Therefore all of the SCC's functions can also be made // non-convergent. We'll remove the attr from the callsites in // InstCombineCalls. for (Function *F : SCCNodes) { - if (!F->isConvergent()) continue; + if (!F->isConvergent()) + continue; DEBUG(dbgs() << "Removing convergent attr from fn " << F->getName() << "\n"); @@ -1119,10 +1128,12 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &CG, - CGSCCUpdateResult &) { + CGSCCUpdateResult &UR) { FunctionAnalysisManager &FAM = AM.getResult(C, CG).getManager(); + auto &PDT = AM.getResult(C, CG); + // We pass a lambda into functions to wire them up to the analysis manager // for getting function analyses. auto AARGetter = [&](Function &F) -> AAResults & { @@ -1160,7 +1171,7 @@ bool Changed = false; Changed |= addArgumentReturnedAttrs(SCCNodes); Changed |= addReadAttrs(SCCNodes, AARGetter); - Changed |= addArgumentAttrs(SCCNodes); + Changed |= addArgumentAttrs(SCCNodes, PDT); // If we have no external nodes participating in the SCC, we can deduce some // more precise attributes as well. @@ -1190,6 +1201,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); AU.addRequired(); + AU.addRequired(); getAAResultsAnalysisUsage(AU); CallGraphSCCPass::getAnalysisUsage(AU); } @@ -1210,7 +1222,8 @@ } template -static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) { +static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter, + PostDominatorTree &PDT) { bool Changed = false; // Fill SCCNodes with the elements of the SCC. Used for quickly looking up @@ -1237,7 +1250,7 @@ Changed |= addArgumentReturnedAttrs(SCCNodes); Changed |= addReadAttrs(SCCNodes, AARGetter); - Changed |= addArgumentAttrs(SCCNodes); + Changed |= addArgumentAttrs(SCCNodes, PDT); // If we have no external nodes participating in the SCC, we can deduce some // more precise attributes as well. @@ -1254,7 +1267,9 @@ bool PostOrderFunctionAttrsLegacyPass::runOnSCC(CallGraphSCC &SCC) { if (skipSCC(SCC)) return false; - return runImpl(SCC, LegacyAARGetter(*this)); + + auto &PDT = getAnalysis().getPostDomTree(); + return runImpl(SCC, LegacyAARGetter(*this), PDT); } namespace { @@ -1272,6 +1287,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); } @@ -1281,11 +1297,13 @@ char ReversePostOrderFunctionAttrsLegacyPass::ID = 0; -INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs", - "Deduce function attributes in RPO", false, false) +INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrsLegacyPass, + "rpo-functionattrs", "Deduce function attributes in RPO", + false, false) INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, "rpo-functionattrs", - "Deduce function attributes in RPO", false, false) +INITIALIZE_PASS_END(ReversePostOrderFunctionAttrsLegacyPass, + "rpo-functionattrs", "Deduce function attributes in RPO", + false, false) Pass *llvm::createReversePostOrderFunctionAttrsPass() { return new ReversePostOrderFunctionAttrsLegacyPass(); Index: lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- lib/Transforms/IPO/PassManagerBuilder.cpp +++ lib/Transforms/IPO/PassManagerBuilder.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" @@ -50,30 +51,27 @@ cl::desc("Run the Loop vectorization passes")); static cl::opt -RunSLPVectorization("vectorize-slp", cl::Hidden, - cl::desc("Run the SLP vectorization passes")); + RunSLPVectorization("vectorize-slp", cl::Hidden, + cl::desc("Run the SLP vectorization passes")); -static cl::opt -UseGVNAfterVectorization("use-gvn-after-vectorization", - cl::init(false), cl::Hidden, - cl::desc("Run GVN instead of Early CSE after vectorization passes")); +static cl::opt UseGVNAfterVectorization( + "use-gvn-after-vectorization", cl::init(false), cl::Hidden, + cl::desc("Run GVN instead of Early CSE after vectorization passes")); static cl::opt ExtraVectorizerPasses( "extra-vectorizer-passes", cl::init(false), cl::Hidden, cl::desc("Run cleanup optimization passes after vectorization.")); -static cl::opt -RunLoopRerolling("reroll-loops", cl::Hidden, - cl::desc("Run the loop rerolling pass")); +static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, + cl::desc("Run the loop rerolling pass")); static cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); -static cl::opt -RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization", - cl::init(true), cl::Hidden, - cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " - "vectorizer instead of before")); +static cl::opt RunSLPAfterLoopVectorization( + "run-slp-after-loop-vectorization", cl::init(true), cl::Hidden, + cl::desc("Run the SLP vectorizer (and BB vectorizer) after the Loop " + "vectorizer instead of before")); // Experimental option to use CFL-AA enum class CFLAAType { None, Steensgaard, Andersen, Both }; @@ -96,13 +94,13 @@ EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden, cl::desc("Enable preparation for ThinLTO.")); -static cl::opt RunPGOInstrGen( - "profile-generate", cl::init(false), cl::Hidden, - cl::desc("Enable PGO instrumentation.")); +static cl::opt RunPGOInstrGen("profile-generate", cl::init(false), + cl::Hidden, + cl::desc("Enable PGO instrumentation.")); static cl::opt PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden, - cl::desc("Specify the path of profile data file.")); + cl::desc("Specify the path of profile data file.")); static cl::opt RunPGOInstrUse( "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"), @@ -126,9 +124,9 @@ "enable-earlycse-memssa", cl::init(true), cl::Hidden, cl::desc("Enable the EarlyCSE w/ MemorySSA pass (default = on)")); -static cl::opt EnableGVNHoist( - "enable-gvn-hoist", cl::init(false), cl::Hidden, - cl::desc("Enable the GVN hoisting pass (default = off)")); +static cl::opt + EnableGVNHoist("enable-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass (default = off)")); static cl::opt DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false), @@ -140,31 +138,31 @@ cl::Hidden, cl::desc("Enable the simple loop unswitch pass.")); -static cl::opt EnableGVNSink( - "enable-gvn-sink", cl::init(false), cl::Hidden, - cl::desc("Enable the GVN sinking pass (default = off)")); +static cl::opt + EnableGVNSink("enable-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN sinking pass (default = off)")); PassManagerBuilder::PassManagerBuilder() { - OptLevel = 2; - SizeLevel = 0; - LibraryInfo = nullptr; - Inliner = nullptr; - DisableUnrollLoops = false; - SLPVectorize = RunSLPVectorization; - LoopVectorize = RunLoopVectorization; - RerollLoops = RunLoopRerolling; - NewGVN = RunNewGVN; - DisableGVNLoadPRE = false; - VerifyInput = false; - VerifyOutput = false; - MergeFunctions = false; - PrepareForLTO = false; - EnablePGOInstrGen = RunPGOInstrGen; - PGOInstrGen = PGOOutputFile; - PGOInstrUse = RunPGOInstrUse; - PrepareForThinLTO = EnablePrepareForThinLTO; - PerformThinLTO = false; - DivergentTarget = false; + OptLevel = 2; + SizeLevel = 0; + LibraryInfo = nullptr; + Inliner = nullptr; + DisableUnrollLoops = false; + SLPVectorize = RunSLPVectorization; + LoopVectorize = RunLoopVectorization; + RerollLoops = RunLoopRerolling; + NewGVN = RunNewGVN; + DisableGVNLoadPRE = false; + VerifyInput = false; + VerifyOutput = false; + MergeFunctions = false; + PrepareForLTO = false; + EnablePGOInstrGen = RunPGOInstrGen; + PGOInstrGen = PGOOutputFile; + PGOInstrUse = RunPGOInstrUse; + PrepareForThinLTO = EnablePrepareForThinLTO; + PerformThinLTO = false; + DivergentTarget = false; } PassManagerBuilder::~PassManagerBuilder() { @@ -174,7 +172,9 @@ /// Set of global extensions, automatically added as part of the standard set. static ManagedStatic, 8> > GlobalExtensions; + PassManagerBuilder::ExtensionFn>, + 8>> + GlobalExtensions; /// Check if GlobalExtensions is constructed and not empty. /// Since GlobalExtensions is a managed static, calling 'empty()' will trigger @@ -245,7 +245,8 @@ if (LibraryInfo) FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo)); - if (OptLevel == 0) return; + if (OptLevel == 0) + return; addInitialAliasAnalysisPasses(FPM); @@ -304,7 +305,8 @@ // Start of function pass. // Break up aggregate allocas, using SSAUpdater. MPM.add(createSROAPass()); - MPM.add(createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies + MPM.add( + createEarlyCSEPass(EnableEarlyCSEMemSSA)); // Catch trivial redundancies if (EnableGVNHoist) MPM.add(createGVNHoistPass()); if (EnableGVNSink) { @@ -314,9 +316,9 @@ // Speculative execution if the target has divergent branches; otherwise nop. MPM.add(createSpeculativeExecutionIfHasBranchDivergencePass()); - MPM.add(createJumpThreadingPass()); // Thread jumps. + MPM.add(createJumpThreadingPass()); // Thread jumps. MPM.add(createCorrelatedValuePropagationPass()); // Propagate conditionals - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Combine silly seq's addInstructionCombiningPass(MPM); if (SizeLevel == 0 && !DisableLibCallsShrinkWrap) @@ -328,28 +330,28 @@ MPM.add(createPGOMemOPSizeOptLegacyPass()); MPM.add(createTailCallEliminationPass()); // Eliminate tail calls - MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createReassociatePass()); // Reassociate expressions + MPM.add(createCFGSimplificationPass()); // Merge & remove BBs + MPM.add(createReassociatePass()); // Reassociate expressions // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); - MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLICMPass()); // Hoist loop invariants if (EnableSimpleLoopUnswitch) MPM.add(createSimpleLoopUnswitchLegacyPass()); else MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); MPM.add(createCFGSimplificationPass()); addInstructionCombiningPass(MPM); - MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars - MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. + MPM.add(createIndVarSimplifyPass()); // Canonicalize indvars + MPM.add(createLoopIdiomPass()); // Recognize idioms like memset. addExtensionsToPM(EP_LateLoopOptimizations, MPM); - MPM.add(createLoopDeletionPass()); // Delete dead loops + MPM.add(createLoopDeletionPass()); // Delete dead loops if (EnableLoopInterchange) { MPM.add(createLoopInterchangePass()); // Interchange loops MPM.add(createCFGSimplificationPass()); } if (!DisableUnrollLoops) - MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops + MPM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops addExtensionsToPM(EP_LoopOptimizerEnd, MPM); if (OptLevel > 1) { @@ -357,21 +359,21 @@ MPM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies } - MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset - MPM.add(createSCCPPass()); // Constant prop with SCCP + MPM.add(createMemCpyOptPass()); // Remove memcpy / form memset + MPM.add(createSCCPPass()); // Constant prop with SCCP // Delete dead bit computations (instcombine runs after to fold away the dead // computations, and then ADCE will run later to exploit any new DCE // opportunities that creates). - MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations + MPM.add(createBitTrackingDCEPass()); // Delete dead bit computations // Run instcombine after redundancy elimination to exploit opportunities // opened up by them. addInstructionCombiningPass(MPM); addExtensionsToPM(EP_Peephole, MPM); - MPM.add(createJumpThreadingPass()); // Thread jumps + MPM.add(createJumpThreadingPass()); // Thread jumps MPM.add(createCorrelatedValuePropagationPass()); - MPM.add(createDeadStoreEliminationPass()); // Delete dead stores + MPM.add(createDeadStoreEliminationPass()); // Delete dead stores MPM.add(createLICMPass()); addExtensionsToPM(EP_ScalarOptimizerLate, MPM); @@ -381,7 +383,7 @@ if (!RunSLPAfterLoopVectorization && SLPVectorize) MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. - MPM.add(createAggressiveDCEPass()); // Delete dead instructions + MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs // Clean up after everything. addInstructionCombiningPass(MPM); @@ -395,6 +397,9 @@ MPM.add(createSampleProfileLoaderPass(PGOSampleUse)); } + // Required for function attribute inference + MPM.add(createPostDomTree()); + // Allow forcing function attributes as a debugging and tuning aid. MPM.add(createForceFunctionAttrsLegacyPass()); @@ -461,6 +466,9 @@ if (PrepareForThinLTOUsingPGOSampleProfile) DisableUnrollLoops = true; + // Required for function attribute inference + MPM.add(createPostDomTree()); + // Infer attributes about declarations if possible. MPM.add(createInferFunctionAttrsLegacyPass()); @@ -469,7 +477,7 @@ if (OptLevel > 2) MPM.add(createCallSiteSplittingPass()); - MPM.add(createIPSCCPPass()); // IP SCCP + MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createCalledValuePropagationPass()); MPM.add(createGlobalOptimizerPass()); // Optimize out global vars // Promote any localized global vars. @@ -566,8 +574,8 @@ // size. By placing it just after inlining other optimizations which runs // later might get benefit of no-alias assumption in clone loop. if (UseLoopVersioningLICM) { - MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM - MPM.add(createLICMPass()); // Hoist loop invariants + MPM.add(createLoopVersioningLICMPass()); // Do LoopVersioningLICM + MPM.add(createLICMPass()); // Hoist loop invariants } // We add a fresh GlobalsModRef run at this point. This is particularly @@ -648,7 +656,7 @@ addInstructionCombiningPass(MPM); if (!DisableUnrollLoops) { - MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops + MPM.add(createLoopUnrollPass(OptLevel)); // Unroll small loops // LoopUnroll may generate some redundency to cleanup. addInstructionCombiningPass(MPM); @@ -658,7 +666,7 @@ // outer loop. LICM pass can help to promote the runtime check out if the // checked value is loop invariant. MPM.add(createLICMPass()); - } + } // After vectorization and unrolling, assume intrinsics may tell us more // about pointer alignments. @@ -670,8 +678,8 @@ // GlobalOpt already deletes dead functions and globals, at -O2 try a // late pass of GlobalDCE. It is capable of deleting dead cycles. if (OptLevel > 1) { - MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. - MPM.add(createConstantMergePass()); // Merge dup global constants + MPM.add(createGlobalDCEPass()); // Remove dead fns and globals. + MPM.add(createConstantMergePass()); // Merge dup global constants } if (MergeFunctions) @@ -705,6 +713,9 @@ // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); + // Required for function attribute inference + PM.add(createPostDomTree()); + // Allow forcing function attributes as a debugging and tuning aid. PM.add(createForceFunctionAttrsLegacyPass()); @@ -775,7 +786,7 @@ Inliner = nullptr; } - PM.add(createPruneEHPass()); // Remove dead EH info. + PM.add(createPruneEHPass()); // Remove dead EH info. // Optimize globals again if we ran the inliner. if (RunInliner) @@ -796,13 +807,13 @@ // Run a few AA driven optimizations here and now, to cleanup the code. PM.add(createPostOrderFunctionAttrsLegacyPass()); // Add nocapture. - PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. + PM.add(createGlobalsAAWrapperPass()); // IP alias analysis. - PM.add(createLICMPass()); // Hoist loop invariants. + PM.add(createLICMPass()); // Hoist loop invariants. PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds. PM.add(NewGVN ? createNewGVNPass() : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies. - PM.add(createMemCpyOptPass()); // Remove dead memcpys. + PM.add(createMemCpyOptPass()); // Remove dead memcpys. // Nuke dead stores. PM.add(createDeadStoreEliminationPass()); @@ -814,7 +825,7 @@ PM.add(createLoopInterchangePass()); if (!DisableUnrollLoops) - PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops + PM.add(createSimpleLoopUnrollPass(OptLevel)); // Unroll small loops PM.add(createLoopVectorizePass(true, LoopVectorize)); // The vectorizer may have significantly shortened a loop body; unroll again. if (!DisableUnrollLoops) @@ -823,10 +834,10 @@ // Now that we've optimized loops (in particular loop induction variables), // we may have exposed more scalar opportunities. Run parts of the scalar // optimizer again at this point. - addInstructionCombiningPass(PM); // Initial cleanup + addInstructionCombiningPass(PM); // Initial cleanup PM.add(createCFGSimplificationPass()); // if-convert - PM.add(createSCCPPass()); // Propagate exposed constants - addInstructionCombiningPass(PM); // Clean up again + PM.add(createSCCPPass()); // Propagate exposed constants + addInstructionCombiningPass(PM); // Clean up again PM.add(createBitTrackingDCEPass()); // More scalar chains could be vectorized due to more alias information @@ -926,7 +937,7 @@ } inline PassManagerBuilder *unwrap(LLVMPassManagerBuilderRef P) { - return reinterpret_cast(P); + return reinterpret_cast(P); } inline LLVMPassManagerBuilderRef wrap(PassManagerBuilder *P) { @@ -943,57 +954,49 @@ delete Builder; } -void -LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, - unsigned OptLevel) { +void LLVMPassManagerBuilderSetOptLevel(LLVMPassManagerBuilderRef PMB, + unsigned OptLevel) { PassManagerBuilder *Builder = unwrap(PMB); Builder->OptLevel = OptLevel; } -void -LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, - unsigned SizeLevel) { +void LLVMPassManagerBuilderSetSizeLevel(LLVMPassManagerBuilderRef PMB, + unsigned SizeLevel) { PassManagerBuilder *Builder = unwrap(PMB); Builder->SizeLevel = SizeLevel; } -void -LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, - LLVMBool Value) { +void LLVMPassManagerBuilderSetDisableUnitAtATime(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { // NOTE: The DisableUnitAtATime switch has been removed. } -void -LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, - LLVMBool Value) { +void LLVMPassManagerBuilderSetDisableUnrollLoops(LLVMPassManagerBuilderRef PMB, + LLVMBool Value) { PassManagerBuilder *Builder = unwrap(PMB); Builder->DisableUnrollLoops = Value; } -void -LLVMPassManagerBuilderSetDisableSimplifyLibCalls(LLVMPassManagerBuilderRef PMB, - LLVMBool Value) { +void LLVMPassManagerBuilderSetDisableSimplifyLibCalls( + LLVMPassManagerBuilderRef PMB, LLVMBool Value) { // NOTE: The simplify-libcalls pass has been removed. } -void -LLVMPassManagerBuilderUseInlinerWithThreshold(LLVMPassManagerBuilderRef PMB, - unsigned Threshold) { +void LLVMPassManagerBuilderUseInlinerWithThreshold( + LLVMPassManagerBuilderRef PMB, unsigned Threshold) { PassManagerBuilder *Builder = unwrap(PMB); Builder->Inliner = createFunctionInliningPass(Threshold); } -void -LLVMPassManagerBuilderPopulateFunctionPassManager(LLVMPassManagerBuilderRef PMB, - LLVMPassManagerRef PM) { +void LLVMPassManagerBuilderPopulateFunctionPassManager( + LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM) { PassManagerBuilder *Builder = unwrap(PMB); legacy::FunctionPassManager *FPM = unwrap(PM); Builder->populateFunctionPassManager(*FPM); } -void -LLVMPassManagerBuilderPopulateModulePassManager(LLVMPassManagerBuilderRef PMB, - LLVMPassManagerRef PM) { +void LLVMPassManagerBuilderPopulateModulePassManager( + LLVMPassManagerBuilderRef PMB, LLVMPassManagerRef PM) { PassManagerBuilder *Builder = unwrap(PMB); legacy::PassManagerBase *MPM = unwrap(PM); Builder->populateModulePassManager(*MPM); Index: lib/Transforms/IPO/WholeProgramDevirt.cpp =================================================================== --- lib/Transforms/IPO/WholeProgramDevirt.cpp +++ lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -82,10 +82,12 @@ #include "llvm/Transforms/Utils/Evaluator.h" #include #include +#include #include #include #include +using namespace std; using namespace llvm; using namespace wholeprogramdevirt; @@ -224,7 +226,8 @@ VirtualCallTarget::VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM) : Fn(Fn), TM(TM), - IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()), WasDevirt(false) {} + IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()), + WasDevirt(false) {} namespace { @@ -253,8 +256,7 @@ return DenseMapInfo::getHashValue(I.TypeID) ^ DenseMapInfo::getHashValue(I.ByteOffset); } - static bool isEqual(const VTableSlot &LHS, - const VTableSlot &RHS) { + static bool isEqual(const VTableSlot &LHS, const VTableSlot &RHS) { return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset; } }; @@ -741,8 +743,8 @@ } bool DevirtModule::trySingleImplDevirt( - MutableArrayRef TargetsForSlot, - VTableSlotInfo &SlotInfo, WholeProgramDevirtResolution *Res) { + MutableArrayRef TargetsForSlot, VTableSlotInfo &SlotInfo, + WholeProgramDevirtResolution *Res) { // See if the program contains a single implementation of this virtual // function. Function *TheFn = TargetsForSlot[0].Fn; @@ -1099,7 +1101,6 @@ for (auto &&Target : TargetsForSlot) Target.WasDevirt = true; - if (CSByConstantArg.second.isExported()) { ResByArg->TheKind = WholeProgramDevirtResolution::ByArg::VirtualConstProp; exportConstant(Slot, CSByConstantArg.first, "byte", OffsetByte, @@ -1311,8 +1312,8 @@ if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) { // The type of the function in the declaration is irrelevant because every // call site will cast it to the correct type. - auto *SingleImpl = M.getOrInsertFunction( - Res.SingleImplName, Type::getVoidTy(M.getContext())); + auto *SingleImpl = M.getOrInsertFunction(Res.SingleImplName, + Type::getVoidTy(M.getContext())); // This is the import phase so we should not be exporting anything. bool IsExported = false; @@ -1365,6 +1366,7 @@ } bool DevirtModule::run() { + std::cout << "Devirtualization pass.\n"; Function *TypeTestFunc = M.getFunction(Intrinsic::getName(Intrinsic::type_test)); Function *TypeCheckedLoadFunc = @@ -1453,7 +1455,7 @@ // For each (type, offset) pair: bool DidVirtualConstProp = false; - std::map DevirtTargets; + std::map DevirtTargets; for (auto &S : CallSlots) { // Search each of the members of the type identifier for the virtual // function implementation at offset S.first.ByteOffset, and add to