Index: llvm/trunk/include/llvm/Analysis/CGSCCPassManager.h =================================================================== --- llvm/trunk/include/llvm/Analysis/CGSCCPassManager.h +++ llvm/trunk/include/llvm/Analysis/CGSCCPassManager.h @@ -11,49 +11,228 @@ /// This header provides classes for managing passes over SCCs of the call /// graph. These passes form an important component of LLVM's interprocedural /// optimizations. Because they operate on the SCCs of the call graph, and they -/// traverse the graph in post order, they can effectively do pair-wise -/// interprocedural optimizations for all call edges in the program. At each -/// call site edge, the callee has already been optimized as much as is -/// possible. This in turn allows very accurate analysis of it for IPO. +/// traverse the graph in post-order, they can effectively do pair-wise +/// interprocedural optimizations for all call edges in the program while +/// incrementally refining it and improving the context of these pair-wise +/// optimizations. At each call site edge, the callee has already been +/// optimized as much as is possible. This in turn allows very accurate +/// analysis of it for IPO. +/// +/// A secondary more general goal is to be able to isolate optimization on +/// unrelated parts of the IR module. This is useful to ensure our +/// optimizations are principled and don't miss oportunities where refinement +/// of one part of the module influence transformations in another part of the +/// module. But this is also useful if we want to parallelize the optimizations +/// across common large module graph shapes which tend to be very wide and have +/// large regions of unrelated cliques. +/// +/// To satisfy these goals, we use the LazyCallGraph which provides two graphs +/// nested inside each other (and built lazily from the bottom-up): the call +/// graph proper, and a reference graph. The reference graph is super set of +/// the call graph and is a conservative approximation of what could through +/// scalar or CGSCC transforms *become* the call graph. Using this allows us to +/// ensure we optimize functions prior to them being introduced into the call +/// graph by devirtualization or other technique, and thus ensures that +/// subsequent pair-wise interprocedural optimizations observe the optimized +/// form of these functions. The (potentially transitive) reference +/// reachability used by the reference graph is a conservative approximation +/// that still allows us to have independent regions of the graph. +/// +/// FIXME: There is one major drawback of the reference graph: in its naive +/// form it is quadratic because it contains a distinct edge for each +/// (potentially indirect) reference, even if are all through some common +/// global table of function pointers. This can be fixed in a number of ways +/// that essentially preserve enough of the normalization. While it isn't +/// expected to completely preclude the usability of this, it will need to be +/// addressed. +/// +/// +/// All of these issues are made substantially more complex in the face of +/// mutations to the call graph while optimization passes are being run. When +/// mutations to the call graph occur we want to achieve two different things: +/// +/// - We need to update the call graph in-flight and invalidate analyses +/// cached on entities in the graph. Because of the cache-based analysis +/// design of the pass manager, it is essential to have stable identities for +/// the elements of the IR that passes traverse, and to invalidate any +/// analyses cached on these elements as the mutations take place. +/// +/// - We want to preserve the incremental and post-order traversal of the +/// graph even as it is refined and mutated. This means we want optimization +/// to observe the most refined form of the call graph and to do so in +/// post-order. +/// +/// To address this, the CGSCC manager uses both worklists that can be expanded +/// by passes which transform the IR, and provides invalidation tests to skip +/// entries that become dead. This extra data is provided to every SCC pass so +/// that it can carefully update the manager's traversal as the call graph +/// mutates. +/// +/// We also provide support for running function passes within the CGSCC walk, +/// and there we provide automatic update of the call graph including of the +/// pass manager to reflect call graph changes that fall out naturally as part +/// of scalar transformations. +/// +/// The patterns used to ensure the goals of post-order visitation of the fully +/// refined graph: +/// +/// 1) Sink toward the "bottom" as the graph is refined. This means that any +/// iteration continues in some valid post-order sequence after the mutation +/// has altered the structure. +/// +/// 2) Enqueue in post-order, including the current entity. If the current +/// entity's shape changes, it and everything after it in post-order needs +/// to be visited to observe that shape. /// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H +#include "llvm/ADT/PriorityWorklist.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/IR/PassManager.h" namespace llvm { -extern template class PassManager; -/// \brief The CGSCC pass manager. -/// -/// See the documentation for the PassManager template for details. It runs -/// a sequency of SCC passes over each SCC that the manager is run over. This -/// typedef serves as a convenient way to refer to this construct. -typedef PassManager CGSCCPassManager; +struct CGSCCUpdateResult; -extern template class AnalysisManager; +extern template class AnalysisManager; /// \brief The CGSCC analysis manager. /// /// See the documentation for the AnalysisManager template for detail /// documentation. This typedef serves as a convenient way to refer to this /// construct in the adaptors and proxies used to integrate this into the larger /// pass manager infrastructure. -typedef AnalysisManager CGSCCAnalysisManager; +typedef AnalysisManager + CGSCCAnalysisManager; + +// Explicit specialization and instantiation declarations for the pass manager. +// See the comments on the definition of the specialization for details on how +// it differs from the primary template. +template <> +PreservedAnalyses +PassManager::run(LazyCallGraph::SCC &InitialC, + CGSCCAnalysisManager &AM, + LazyCallGraph &G, CGSCCUpdateResult &UR); +extern template class PassManager; + +/// \brief The CGSCC pass manager. +/// +/// See the documentation for the PassManager template for details. It runs +/// a sequency of SCC passes over each SCC that the manager is run over. This +/// typedef serves as a convenient way to refer to this construct. +typedef PassManager + CGSCCPassManager; + +/// An explicit specialization of the require analysis template pass. +template +struct RequireAnalysisPass + : PassInfoMixin> { + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &) { + (void)AM.template getResult(C, CG); + return PreservedAnalyses::all(); + } +}; extern template class InnerAnalysisManagerProxy; /// A proxy from a \c CGSCCAnalysisManager to a \c Module. typedef InnerAnalysisManagerProxy CGSCCAnalysisManagerModuleProxy; -extern template class OuterAnalysisManagerProxy; +extern template class OuterAnalysisManagerProxy< + ModuleAnalysisManager, LazyCallGraph::SCC, LazyCallGraph &>; /// A proxy from a \c ModuleAnalysisManager to an \c SCC. -typedef OuterAnalysisManagerProxy +typedef OuterAnalysisManagerProxy ModuleAnalysisManagerCGSCCProxy; +/// Support structure for SCC passes to communicate updates the call graph back +/// to the CGSCC pass manager infrsatructure. +/// +/// The CGSCC pass manager runs SCC passes which are allowed to update the call +/// graph and SCC structures. This means the structure the pass manager works +/// on is mutating underneath it. In order to support that, there needs to be +/// careful communication about the precise nature and ramifications of these +/// updates to the pass management infrastructure. +/// +/// All SCC passes will have to accept a reference to the management layer's +/// update result struct and use it to reflect the results of any CG updates +/// performed. +/// +/// Passes which do not change the call graph structure in any way can just +/// ignore this argument to their run method. +struct CGSCCUpdateResult { + /// Worklist of the RefSCCs queued for processing. + /// + /// When a pass refines the graph and creates new RefSCCs or causes them to + /// have a different shape or set of component SCCs it should add the RefSCCs + /// to this worklist so that we visit them in the refined form. + /// + /// This worklist is in reverse post-order, as we pop off the back in order + /// to observe RefSCCs in post-order. When adding RefSCCs, clients should add + /// them in reverse post-order. + SmallPriorityWorklist &RCWorklist; + + /// Worklist of the SCCs queued for processing. + /// + /// When a pass refines the graph and creates new SCCs or causes them to have + /// a different shape or set of component functions it should add the SCCs to + /// this worklist so that we visit them in the refined form. + /// + /// Note that if the SCCs are part of a RefSCC that is added to the \c + /// RCWorklist, they don't need to be added here as visiting the RefSCC will + /// be sufficient to re-visit the SCCs within it. + /// + /// This worklist is in reverse post-order, as we pop off the back in order + /// to observe SCCs in post-order. When adding SCCs, clients should add them + /// in reverse post-order. + SmallPriorityWorklist &CWorklist; + + /// The set of invalidated RefSCCs which should be skipped if they are found + /// in \c RCWorklist. + /// + /// This is used to quickly prune out RefSCCs when they get deleted and + /// happen to already be on the worklist. We use this primarily to avoid + /// scanning the list and removing entries from it. + SmallPtrSetImpl &InvalidatedRefSCCs; + + /// The set of invalidated SCCs which should be skipped if they are found + /// in \c CWorklist. + /// + /// This is used to quickly prune out SCCs when they get deleted and happen + /// to already be on the worklist. We use this primarily to avoid scanning + /// the list and removing entries from it. + SmallPtrSetImpl &InvalidatedSCCs; + + /// If non-null, the updated current \c RefSCC being processed. + /// + /// This is set when a graph refinement takes place an the "current" point in + /// the graph moves "down" or earlier in the post-order walk. This will often + /// cause the "current" RefSCC to be a newly created RefSCC object and the + /// old one to be added to the above worklist. When that happens, this + /// pointer is non-null and can be used to continue processing the "top" of + /// the post-order walk. + LazyCallGraph::RefSCC *UpdatedRC; + + /// If non-null, the updated current \c SCC being processed. + /// + /// This is set when a graph refinement takes place an the "current" point in + /// the graph moves "down" or earlier in the post-order walk. This will often + /// cause the "current" SCC to be a newly created SCC object and the old one + /// to be added to the above worklist. When that happens, this pointer is + /// non-null and can be used to continue processing the "top" of the + /// post-order walk. + LazyCallGraph::SCC *UpdatedC; +}; + /// \brief The core module pass which does a post-order walk of the SCCs and /// runs a CGSCC pass over each one. /// @@ -97,28 +276,101 @@ // Get the call graph for this module. LazyCallGraph &CG = AM.getResult(M); + // We keep worklists to allow us to push more work onto the pass manager as + // the passes are run. + SmallPriorityWorklist RCWorklist; + SmallPriorityWorklist CWorklist; + + // Keep sets for invalidated SCCs and RefSCCs that should be skipped when + // iterating off the worklists. + SmallPtrSet InvalidRefSCCSet; + SmallPtrSet InvalidSCCSet; + + CGSCCUpdateResult UR = {RCWorklist, CWorklist, InvalidRefSCCSet, + InvalidSCCSet, nullptr, nullptr}; + PreservedAnalyses PA = PreservedAnalyses::all(); - for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { - if (DebugLogging) - dbgs() << "Running an SCC pass across the RefSCC: " << RC << "\n"; - - for (LazyCallGraph::SCC &C : RC) { - PreservedAnalyses PassPA = Pass.run(C, CGAM); - - // We know that the CGSCC pass couldn't have invalidated any other - // SCC's analyses (that's the contract of a CGSCC pass), so - // directly handle the CGSCC analysis manager's invalidation here. We - // also update the preserved set of analyses to reflect that invalidated - // analyses are now safe to preserve. - // FIXME: This isn't quite correct. We need to handle the case where the - // pass updated the CG, particularly some child of the current SCC, and - // invalidate its analyses. - PassPA = CGAM.invalidate(C, std::move(PassPA)); - - // Then intersect the preserved set so that invalidation of module - // analyses will eventually occur when the module pass completes. - PA.intersect(std::move(PassPA)); - } + for (LazyCallGraph::RefSCC &InitialRC : CG.postorder_ref_sccs()) { + assert(RCWorklist.empty() && + "Should always start with an empty RefSCC worklist"); + // The postorder_ref_sccs range we are walking is lazily constructed, so + // we only push the first one onto the worklist. The worklist allows us + // to capture *new* RefSCCs created during transformations. + // + // We really want to form RefSCCs lazily because that makes them cheaper + // to update as the program is simplified and allows us to have greater + // cache locality as forming a RefSCC touches all the parts of all the + // functions within that RefSCC. + RCWorklist.insert(&InitialRC); + + do { + LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val(); + if (InvalidRefSCCSet.count(RC)) + continue; + + assert(CWorklist.empty() && + "Should always start with an empty SCC worklist"); + + if (DebugLogging) + dbgs() << "Running an SCC pass across the RefSCC: " << *RC << "\n"; + + // Push the initial SCCs in reverse post-order as we'll pop off the the + // back and so see this in post-order. + for (LazyCallGraph::SCC &C : reverse(*RC)) + CWorklist.insert(&C); + + do { + LazyCallGraph::SCC *C = CWorklist.pop_back_val(); + // Due to call graph mutations, we may have invalid SCCs or SCCs from + // other RefSCCs in the worklist. The invalid ones are dead and the + // other RefSCCs should be queued above, so we just need to skip both + // scenarios here. + if (InvalidSCCSet.count(C) || &C->getOuterRefSCC() != RC) + continue; + + do { + // Check that we didn't miss any update scenario. + assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + assert(&C->getOuterRefSCC() == RC && + "Processing an SCC in a different RefSCC!"); + + UR.UpdatedRC = nullptr; + UR.UpdatedC = nullptr; + PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR); + + // We handle invalidating the CGSCC analysis manager's information + // for the (potentially updated) SCC here. Note that any other SCCs + // whose structure has changed should have been invalidated by + // whatever was updating the call graph. This SCC gets invalidated + // late as it contains the nodes that were actively being + // processed. + PassPA = CGAM.invalidate(*(UR.UpdatedC ? UR.UpdatedC : C), + std::move(PassPA)); + + // Then intersect the preserved set so that invalidation of module + // analyses will eventually occur when the module pass completes. + PA.intersect(std::move(PassPA)); + + // The pass may have restructured the call graph and refined the + // current SCC and/or RefSCC. We need to update our current SCC and + // RefSCC pointers to follow these. Also, when the current SCC is + // refined, re-run the SCC pass over the newly refined SCC in order + // to observe the most precise SCC model available. This inherently + // cannot cycle excessively as it only happens when we split SCCs + // apart, at most converging on a DAG of single nodes. + // FIXME: If we ever start having RefSCC passes, we'll want to + // iterate there too. + RC = UR.UpdatedRC ? UR.UpdatedRC : RC; + C = UR.UpdatedC ? UR.UpdatedC : C; + if (DebugLogging && UR.UpdatedC) + dbgs() << "Re-running SCC passes after a refinement of the " + "current SCC: " + << *UR.UpdatedC << "\n"; + } while (UR.UpdatedC); + + } while (!CWorklist.empty()); + } while (!RCWorklist.empty()); } // By definition we preserve the proxy. This precludes *any* invalidation @@ -142,10 +394,11 @@ return ModuleToPostOrderCGSCCPassAdaptor(std::move(Pass), DebugLogging); } -extern template class InnerAnalysisManagerProxy; +extern template class InnerAnalysisManagerProxy< + FunctionAnalysisManager, LazyCallGraph::SCC, LazyCallGraph &>; /// A proxy from a \c FunctionAnalysisManager to an \c SCC. -typedef InnerAnalysisManagerProxy +typedef InnerAnalysisManagerProxy FunctionAnalysisManagerCGSCCProxy; extern template class OuterAnalysisManagerProxy; @@ -153,6 +406,16 @@ typedef OuterAnalysisManagerProxy CGSCCAnalysisManagerFunctionProxy; +/// Helper to update the call graph after running a function pass. +/// +/// Function passes can only mutate the call graph in specific ways. This +/// routine provides a helper that updates the call graph in those ways +/// including returning whether any changes were made and populating a CG +/// update result struct for the overall CGSCC walk. +LazyCallGraph::SCC &updateCGAndAnalysisManagerForFunctionPass( + LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging = false); + /// \brief Adaptor that maps from a SCC to its functions. /// /// Designed to allow composition of a FunctionPass(Manager) and @@ -185,37 +448,62 @@ } /// \brief Runs the function pass across every function in the module. - PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM) { + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR) { // Setup the function analysis manager from its proxy. FunctionAnalysisManager &FAM = - AM.getResult(C).getManager(); + AM.getResult(C, CG).getManager(); + + SmallVector Nodes; + for (LazyCallGraph::Node &N : C) + Nodes.push_back(&N); + + // The SCC may get split while we are optimizing functions due to deleting + // edges. If this happens, the current SCC can shift, so keep track of + // a pointer we can overwrite. + LazyCallGraph::SCC *CurrentC = &C; if (DebugLogging) dbgs() << "Running function passes across an SCC: " << C << "\n"; PreservedAnalyses PA = PreservedAnalyses::all(); - for (LazyCallGraph::Node &N : C) { - PreservedAnalyses PassPA = Pass.run(N.getFunction(), FAM); + for (LazyCallGraph::Node *N : Nodes) { + // Skip nodes from other SCCs. These may have been split out during + // processing. We'll eventually visit those SCCs and pick up the nodes + // there. + if (CG.lookupSCC(*N) != CurrentC) + continue; + + PreservedAnalyses PassPA = Pass.run(N->getFunction(), FAM); // We know that the function pass couldn't have invalidated any other // function's analyses (that's the contract of a function pass), so // directly handle the function analysis manager's invalidation here. // Also, update the preserved analyses to reflect that once invalidated // these can again be preserved. - PassPA = FAM.invalidate(N.getFunction(), std::move(PassPA)); + PassPA = FAM.invalidate(N->getFunction(), std::move(PassPA)); // Then intersect the preserved set so that invalidation of module // analyses will eventually occur when the module pass completes. PA.intersect(std::move(PassPA)); + + // Update the call graph based on this function pass. This may also + // update the current SCC to point to a smaller, more refined SCC. + CurrentC = &updateCGAndAnalysisManagerForFunctionPass( + CG, *CurrentC, *N, AM, UR, DebugLogging); + assert(CG.lookupSCC(*N) == CurrentC && + "Current SCC not updated to the SCC containing the current node!"); } // By definition we preserve the proxy. This precludes *any* invalidation // of function analyses by the proxy, but that's OK because we've taken // care to invalidate analyses in the function analysis manager // incrementally above. - // FIXME: We need to update the call graph here to account for any deleted - // edges! PA.preserve(); + + // We've also ensured that we updated the call graph along the way. + PA.preserve(); + return PA; } Index: llvm/trunk/include/llvm/Analysis/LazyCallGraph.h =================================================================== --- llvm/trunk/include/llvm/Analysis/LazyCallGraph.h +++ llvm/trunk/include/llvm/Analysis/LazyCallGraph.h @@ -149,6 +149,9 @@ /// around but clear them. operator bool() const; + /// Returnss the \c Kind of the edge. + Kind getKind() const; + /// Test whether the edge represents a direct call to a function. /// /// This requires that the edge is not null. @@ -248,6 +251,11 @@ } const Edge &operator[](Node &N) const { return (*this)[N.getFunction()]; } + const Edge *lookup(Function &F) const { + auto EI = EdgeIndexMap.find(&F); + return EI != EdgeIndexMap.end() ? &Edges[EI->second] : nullptr; + } + call_edge_iterator call_begin() const { return call_edge_iterator(Edges.begin(), Edges.end()); } @@ -576,10 +584,14 @@ /// the SCC of TargetN (previously the SCC of both). This preserves /// postorder as the TargetN can reach all of the other nodes by definition /// of previously being in a single SCC formed by the cycle from SourceN to - /// TargetN. The newly added nodes are added *immediately* and contiguously - /// prior to the TargetN SCC and so they may be iterated starting from - /// there. - void switchInternalEdgeToRef(Node &SourceN, Node &TargetN); + /// TargetN. + /// + /// The newly added SCCs are added *immediately* and contiguously + /// prior to the TargetN SCC and return the range covering the new SCCs in + /// the RefSCC's postorder sequence. You can directly iterate the returned + /// range to observe all of the new SCCs in postorder. + iterator_range switchInternalEdgeToRef(Node &SourceN, + Node &TargetN); /// Make an existing outgoing ref edge into a call edge. /// @@ -830,6 +842,33 @@ ///@} + ///@{ + /// \name Static helpers for code doing updates to the call graph. + /// + /// These helpers are used to implement parts of the call graph but are also + /// useful to code doing updates or otherwise wanting to walk the IR in the + /// same patterns as when we build the call graph. + + template + static void visitReferences(SmallVectorImpl &Worklist, + SmallPtrSetImpl &Visited, + CallbackT Callback) { + while (!Worklist.empty()) { + Constant *C = Worklist.pop_back_val(); + + if (Function *F = dyn_cast(C)) { + Callback(*F); + continue; + } + + for (Value *Op : C->operand_values()) + if (Visited.insert(cast(Op)).second) + Worklist.push_back(cast(Op)); + } + + ///@} + } + private: typedef SmallVectorImpl::reverse_iterator node_stack_iterator; typedef iterator_range node_stack_range; @@ -917,9 +956,14 @@ return !Value.getPointer().isNull(); } +inline LazyCallGraph::Edge::Kind LazyCallGraph::Edge::getKind() const { + assert(*this && "Queried a null edge!"); + return Value.getInt(); +} + inline bool LazyCallGraph::Edge::isCall() const { assert(*this && "Queried a null edge!"); - return Value.getInt() == Call; + return getKind() == Call; } inline Function &LazyCallGraph::Edge::getFunction() const { Index: llvm/trunk/include/llvm/IR/PassManager.h =================================================================== --- llvm/trunk/include/llvm/IR/PassManager.h +++ llvm/trunk/include/llvm/IR/PassManager.h @@ -380,12 +380,35 @@ return AnalysisResults.empty(); } + /// \brief Clear any results for a single unit of IR. + /// + /// This doesn't invalidate but directly clears the results. It is useful + /// when the IR is being removed and we want to clear out all the memory + /// pinned for it. + void clear(IRUnitT &IR) { + if (DebugLogging) + dbgs() << "Clearing all analysis results for: " << IR.getName() << "\n"; + + // Clear all the invalidated results associated specifically with this + // function. + SmallVector InvalidatedPassIDs; + auto ResultsListI = AnalysisResultLists.find(&IR); + if (ResultsListI == AnalysisResultLists.end()) + return; + // Clear the map pointing into the results list. + for (auto &PassIDAndResult : ResultsListI->second) + AnalysisResults.erase(std::make_pair(PassIDAndResult.first, &IR)); + + // And actually destroy and erase the results associated with this IR. + AnalysisResultLists.erase(ResultsListI); + } + /// \brief Clear the analysis result cache. /// /// This routine allows cleaning up when the set of IR units itself has /// potentially changed, and thus we can't even look up a a result and - /// invalidate it directly. Notably, this does *not* call invalidate functions - /// as there is nothing to be done for them. + /// invalidate it directly. Notably, this does *not* call invalidate + /// functions as there is nothing to be done for them. void clear() { AnalysisResults.clear(); AnalysisResultLists.clear(); Index: llvm/trunk/include/llvm/Transforms/IPO/FunctionAttrs.h =================================================================== --- llvm/trunk/include/llvm/Transforms/IPO/FunctionAttrs.h +++ llvm/trunk/include/llvm/Transforms/IPO/FunctionAttrs.h @@ -30,7 +30,8 @@ /// attribute. It also discovers function arguments that are not captured by /// the function and marks them with the nocapture attribute. struct PostOrderFunctionAttrsPass : PassInfoMixin { - PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM); + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR); }; /// Create a legacy pass manager instance of a pass to compute function attrs Index: llvm/trunk/lib/Analysis/CGSCCPassManager.cpp =================================================================== --- llvm/trunk/lib/Analysis/CGSCCPassManager.cpp +++ llvm/trunk/lib/Analysis/CGSCCPassManager.cpp @@ -8,17 +8,354 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/IR/CallSite.h" using namespace llvm; -// Explicit instantiations for the core proxy templates. namespace llvm { -template class PassManager; -template class AnalysisManager; + +// Explicit instantiations for the core proxy templates. +template class AnalysisManager; +template class PassManager; template class InnerAnalysisManagerProxy; template class OuterAnalysisManagerProxy; + LazyCallGraph::SCC, LazyCallGraph &>; template class InnerAnalysisManagerProxy; + LazyCallGraph::SCC, LazyCallGraph &>; template class OuterAnalysisManagerProxy; + +/// Explicitly specialize the pass manager run method to handle call graph +/// updates. +template <> +PreservedAnalyses +PassManager::run(LazyCallGraph::SCC &InitialC, + CGSCCAnalysisManager &AM, + LazyCallGraph &G, CGSCCUpdateResult &UR) { + PreservedAnalyses PA = PreservedAnalyses::all(); + + if (DebugLogging) + dbgs() << "Starting CGSCC pass manager run.\n"; + + // The SCC may be refined while we are running passes over it, so set up + // a pointer that we can update. + LazyCallGraph::SCC *C = &InitialC; + + for (auto &Pass : Passes) { + if (DebugLogging) + dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n"; + + PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR); + + // Update the SCC if necessary. + C = UR.UpdatedC ? UR.UpdatedC : C; + + // Check that we didn't miss any update scenario. + assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!"); + assert(C->begin() != C->end() && "Cannot have an empty SCC!"); + + // Update the analysis manager as each pass runs and potentially + // invalidates analyses. We also update the preserved set of analyses + // based on what analyses we have already handled the invalidation for + // here and don't need to invalidate when finished. + PassPA = AM.invalidate(*C, std::move(PassPA)); + + // Finally, we intersect the final preserved analyses to compute the + // aggregate preserved set for this pass manager. + PA.intersect(std::move(PassPA)); + + // FIXME: Historically, the pass managers all called the LLVM context's + // yield function here. We don't have a generic way to acquire the + // context and it isn't yet clear what the right pattern is for yielding + // in the new pass manager so it is currently omitted. + // ...getContext().yield(); + } + + if (DebugLogging) + dbgs() << "Finished CGSCC pass manager run.\n"; + + return PA; +} + +} // End llvm namespace + +namespace { +/// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c +/// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly +/// added SCCs. +/// +/// The range of new SCCs must be in postorder already. The SCC they were split +/// out of must be provided as \p C. The current node being mutated and +/// triggering updates must be passed as \p N. +/// +/// This function returns the SCC containing \p N. This will be either \p C if +/// no new SCCs have been split out, or it will be the new SCC containing \p N. +template +LazyCallGraph::SCC * +incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G, + LazyCallGraph::Node &N, LazyCallGraph::SCC *C, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, + bool DebugLogging = false) { + typedef LazyCallGraph::SCC SCC; + + if (NewSCCRange.begin() == NewSCCRange.end()) + return C; + + // Invalidate the analyses of the current SCC and add it to the worklist since + // it has changed its shape. + AM.invalidate(*C, PreservedAnalyses::none()); + UR.CWorklist.insert(C); + if (DebugLogging) + dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n"; + + SCC *OldC = C; + (void)OldC; + + // Update the current SCC. Note that if we have new SCCs, this must actually + // change the SCC. + assert(C != &*NewSCCRange.begin() && + "Cannot insert new SCCs without changing current SCC!"); + C = &*NewSCCRange.begin(); + assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); + + for (SCC &NewC : + reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) { + assert(C != &NewC && "No need to re-visit the current SCC!"); + assert(OldC != &NewC && "Already handled the original SCC!"); + UR.CWorklist.insert(&NewC); + if (DebugLogging) + dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n"; + } + return C; +} +} + +LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass( + LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N, + CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) { + typedef LazyCallGraph::Node Node; + typedef LazyCallGraph::Edge Edge; + typedef LazyCallGraph::SCC SCC; + typedef LazyCallGraph::RefSCC RefSCC; + + RefSCC &InitialRC = InitialC.getOuterRefSCC(); + SCC *C = &InitialC; + RefSCC *RC = &InitialRC; + Function &F = N.getFunction(); + + // Walk the function body and build up the set of retained, promoted, and + // demoted edges. + SmallVector Worklist; + SmallPtrSet Visited; + SmallPtrSet RetainedEdges; + SmallSetVector PromotedRefTargets; + SmallSetVector DemotedCallTargets; + // First walk the function and handle all called functions. We do this first + // because if there is a single call edge, whether there are ref edges is + // irrelevant. + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (auto CS = CallSite(&I)) + if (Function *Callee = CS.getCalledFunction()) + if (Visited.insert(Callee).second && !Callee->isDeclaration()) { + const Edge *E = N.lookup(*Callee); + // FIXME: We should really handle adding new calls. While it will + // make downstream usage more complex, there is no fundamental + // limitation and it will allow passes within the CGSCC to be a bit + // more flexible in what transforms they can do. Until then, we + // verify that new calls haven't been introduced. + assert(E && "No function transformations should introduce *new* " + "call edges! Any new calls should be modeled as " + "promoted existing ref edges!"); + RetainedEdges.insert(Callee); + if (!E->isCall()) + PromotedRefTargets.insert(Callee); + } + + // Now walk all references. + for (BasicBlock &BB : F) + for (Instruction &I : BB) { + for (Value *Op : I.operand_values()) + if (Constant *C = dyn_cast(Op)) + if (Visited.insert(C).second) + Worklist.push_back(C); + + LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) { + // Skip declarations. + if (Referee.isDeclaration()) + return; + + const Edge *E = N.lookup(Referee); + // FIXME: Similarly to new calls, we also currently preclude + // introducing new references. See above for details. + assert(E && "No function transformations should introduce *new* ref " + "edges! Any new ref edges would require IPO which " + "function passes aren't allowed to do!"); + RetainedEdges.insert(&Referee); + if (E->isCall()) + DemotedCallTargets.insert(&Referee); + }); + } + + // First remove all of the edges that are no longer present in this function. + // We have to build a list of dead targets first and then remove them as the + // data structures will all be invalidated by removing them. + SmallVector, 4> DeadTargets; + for (Edge &E : N) + if (!RetainedEdges.count(&E.getFunction())) + DeadTargets.push_back({E.getNode(), E.getKind()}); + for (auto DeadTarget : DeadTargets) { + Node &TargetN = *DeadTarget.getPointer(); + bool IsCall = DeadTarget.getInt() == Edge::Call; + SCC &TargetC = *G.lookupSCC(TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + + if (&TargetRC != RC) { + RC->removeOutgoingEdge(N, TargetN); + if (DebugLogging) + dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN + << "'\n"; + continue; + } + if (DebugLogging) + dbgs() << "Deleting internal " << (IsCall ? "call" : "ref") + << " edge from '" << N << "' to '" << TargetN << "'\n"; + + if (IsCall) + C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, N, + C, AM, UR, DebugLogging); + + auto NewRefSCCs = RC->removeInternalRefEdge(N, TargetN); + if (!NewRefSCCs.empty()) { + // Note that we don't bother to invalidate analyses as ref-edge + // connectivity is not really observable in any way and is intended + // exclusively to be used for ordering of transforms rather than for + // analysis conclusions. + + // The RC worklist is in reverse postorder, so we first enqueue the + // current RefSCC as it will remain the parent of all split RefSCCs, then + // we enqueue the new ones in RPO except for the one which contains the + // source node as that is the "bottom" we will continue processing in the + // bottom-up walk. + UR.RCWorklist.insert(RC); + if (DebugLogging) + dbgs() << "Enqueuing the existing RefSCC in the update worklist: " + << *RC << "\n"; + // Update the RC to the "bottom". + assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!"); + RC = &C->getOuterRefSCC(); + assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!"); + for (RefSCC *NewRC : reverse(NewRefSCCs)) + if (NewRC != RC) { + UR.RCWorklist.insert(NewRC); + if (DebugLogging) + dbgs() << "Enqueuing a new RefSCC in the update worklist: " + << *NewRC << "\n"; + } + } + } + + // Next demote all the call edges that are now ref edges. This helps make + // the SCCs small which should minimize the work below as we don't want to + // form cycles that this would break. + for (Function *RefTarget : DemotedCallTargets) { + Node &TargetN = *G.lookup(*RefTarget); + SCC &TargetC = *G.lookupSCC(TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + + // The easy case is when the target RefSCC is not this RefSCC. This is + // only supported when the target RefSCC is a child of this RefSCC. + if (&TargetRC != RC) { + assert(RC->isAncestorOf(TargetRC) && + "Cannot potentially form RefSCC cycles here!"); + RC->switchOutgoingEdgeToRef(N, TargetN); + if (DebugLogging) + dbgs() << "Switch outgoing call edge to a ref edge from '" << N + << "' to '" << TargetN << "'\n"; + continue; + } + + // Otherwise we are switching an internal call edge to a ref edge. This + // may split up some SCCs. + C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G, N, C, + AM, UR, DebugLogging); + } + + // Now promote ref edges into call edges. + for (Function *CallTarget : PromotedRefTargets) { + Node &TargetN = *G.lookup(*CallTarget); + SCC &TargetC = *G.lookupSCC(TargetN); + RefSCC &TargetRC = TargetC.getOuterRefSCC(); + + // The easy case is when the target RefSCC is not this RefSCC. This is + // only supported when the target RefSCC is a child of this RefSCC. + if (&TargetRC != RC) { + assert(RC->isAncestorOf(TargetRC) && + "Cannot potentially form RefSCC cycles here!"); + RC->switchOutgoingEdgeToCall(N, TargetN); + if (DebugLogging) + dbgs() << "Switch outgoing ref edge to a call edge from '" << N + << "' to '" << TargetN << "'\n"; + continue; + } + if (DebugLogging) + dbgs() << "Switch an internal ref edge to a call edge from '" << N + << "' to '" << TargetN << "'\n"; + + // Otherwise we are switching an internal ref edge to a call edge. This + // may merge away some SCCs, and we add those to the UpdateResult. We also + // need to make sure to update the worklist in the event SCCs have moved + // before the current one in the post-order sequence. + auto InitialSCCIndex = RC->find(*C) - RC->begin(); + auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, TargetN); + if (!InvalidatedSCCs.empty()) { + C = &TargetC; + assert(G.lookupSCC(N) == C && "Failed to update current SCC!"); + + // Any analyses cached for this SCC are no longer precise as the shape + // has changed by introducing this cycle. + AM.invalidate(*C, PreservedAnalyses::none()); + + for (SCC *InvalidatedC : InvalidatedSCCs) { + assert(InvalidatedC != C && "Cannot invalidate the current SCC!"); + UR.InvalidatedSCCs.insert(InvalidatedC); + + // Also clear any cached analyses for the SCCs that are dead. This + // isn't really necessary for correctness but can release memory. + AM.clear(*InvalidatedC); + } + } + auto NewSCCIndex = RC->find(*C) - RC->begin(); + if (InitialSCCIndex < NewSCCIndex) { + // Put our current SCC back onto the worklist as we'll visit other SCCs + // that are now definitively ordered prior to the current one in the + // post-order sequence, and may end up observing more precise context to + // optimize the current SCC. + UR.CWorklist.insert(C); + if (DebugLogging) + dbgs() << "Enqueuing the existing SCC in the worklist: " << *C << "\n"; + // Enqueue in reverse order as we pop off the back of the worklist. + for (SCC &MovedC : reverse(make_range(RC->begin() + InitialSCCIndex, + RC->begin() + NewSCCIndex))) { + UR.CWorklist.insert(&MovedC); + if (DebugLogging) + dbgs() << "Enqueuing a newly earlier in post-order SCC: " << MovedC + << "\n"; + } + } + } + + assert(!UR.InvalidatedSCCs.count(C) && "Invalidated the current SCC!"); + assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!"); + assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!"); + + // Record the current RefSCC and SCC for higher layers of the CGSCC pass + // manager now that all the updates have been applied. + if (RC != &InitialRC) + UR.UpdatedRC = RC; + if (C != &InitialC) + UR.UpdatedC = C; + + return *C; } Index: llvm/trunk/lib/Analysis/LazyCallGraph.cpp =================================================================== --- llvm/trunk/lib/Analysis/LazyCallGraph.cpp +++ llvm/trunk/lib/Analysis/LazyCallGraph.cpp @@ -40,24 +40,6 @@ } } -static void findReferences(SmallVectorImpl &Worklist, - SmallPtrSetImpl &Visited, - SmallVectorImpl &Edges, - DenseMap &EdgeIndexMap) { - while (!Worklist.empty()) { - Constant *C = Worklist.pop_back_val(); - - if (Function *F = dyn_cast(C)) { - addEdge(Edges, EdgeIndexMap, *F, LazyCallGraph::Edge::Ref); - continue; - } - - for (Value *Op : C->operand_values()) - if (Visited.insert(cast(Op)).second) - Worklist.push_back(cast(Op)); - } -} - LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F) : G(&G), F(F), DFSNumber(0), LowLink(0) { DEBUG(dbgs() << " Adding functions called by '" << F.getName() @@ -90,7 +72,9 @@ // We've collected all the constant (and thus potentially function or // function containing) operands to all of the instructions in the function. // Process them (recursively) collecting every function found. - findReferences(Worklist, Visited, Edges, EdgeIndexMap); + visitReferences(Worklist, Visited, [&](Function &F) { + addEdge(Edges, EdgeIndexMap, F, LazyCallGraph::Edge::Ref); + }); } void LazyCallGraph::Node::insertEdgeInternal(Function &Target, Edge::Kind EK) { @@ -144,7 +128,9 @@ DEBUG(dbgs() << " Adding functions referenced by global initializers to the " "entry set.\n"); - findReferences(Worklist, Visited, EntryEdges, EntryIndexMap); + visitReferences(Worklist, Visited, [&](Function &F) { + addEdge(EntryEdges, EntryIndexMap, F, LazyCallGraph::Edge::Ref); + }); for (const Edge &E : EntryEdges) RefSCCEntryNodes.push_back(&E.getFunction()); @@ -211,11 +197,14 @@ assert(!SCCs.empty() && "Can't have an empty SCC!"); // Verify basic properties of the SCCs. + SmallPtrSet SCCSet; for (SCC *C : SCCs) { assert(C && "Can't have a null SCC!"); C->verify(); assert(&C->getOuterRefSCC() == this && "SCC doesn't think it is inside this RefSCC!"); + bool Inserted = SCCSet.insert(C).second; + assert(Inserted && "Found a duplicate SCC!"); } // Check that our indices map correctly. @@ -223,6 +212,7 @@ SCC *C = SCCIndexPair.first; int i = SCCIndexPair.second; assert(C && "Can't have a null SCC in the indices!"); + assert(SCCSet.count(C) && "Found an index for an SCC not in the RefSCC!"); assert(SCCs[i] == C && "Index doesn't point to SCC!"); } @@ -478,8 +468,8 @@ return DeletedSCCs; } -void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, - Node &TargetN) { +iterator_range +LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) { assert(SourceN[TargetN].isCall() && "Must start with a call edge!"); SCC &SourceSCC = *G->lookupSCC(SourceN); @@ -500,7 +490,7 @@ // Check that the RefSCC is still valid. verify(); #endif - return; + return make_range(SCCs.end(), SCCs.end()); } // Otherwise we are removing a call edge from a single SCC. This may break @@ -666,6 +656,9 @@ // We're done. Check the validity on our way out. verify(); #endif + + return make_range(SCCs.begin() + OldIdx, + SCCs.begin() + OldIdx + NewSCCs.size()); } void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN, @@ -1181,6 +1174,9 @@ RootPostOrderNumber; }), SCCs.end()); + SCCIndices.clear(); + for (int i = 0, Size = SCCs.size(); i < Size; ++i) + SCCIndices[SCCs[i]] = i; #ifndef NDEBUG // Now we need to reconnect the current (root) SCC to the graph. We do this Index: llvm/trunk/lib/Passes/PassBuilder.cpp =================================================================== --- llvm/trunk/lib/Passes/PassBuilder.cpp +++ llvm/trunk/lib/Passes/PassBuilder.cpp @@ -159,8 +159,8 @@ /// \brief No-op CGSCC pass which does nothing. struct NoOpCGSCCPass { - PreservedAnalyses run(LazyCallGraph::SCC &C, - CGSCCAnalysisManager &) { + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &, + LazyCallGraph &, CGSCCUpdateResult &UR) { return PreservedAnalyses::all(); } static StringRef name() { return "NoOpCGSCCPass"; } @@ -173,7 +173,7 @@ public: struct Result {}; - Result run(LazyCallGraph::SCC &, CGSCCAnalysisManager &) { + Result run(LazyCallGraph::SCC &, CGSCCAnalysisManager &, LazyCallGraph &G) { return Result(); } static StringRef name() { return "NoOpCGSCCAnalysis"; } @@ -580,7 +580,8 @@ if (Name == "require<" NAME ">") { \ CGPM.addPass(RequireAnalysisPass< \ std::remove_reference::type, \ - LazyCallGraph::SCC>()); \ + LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, \ + CGSCCUpdateResult &>()); \ return true; \ } \ if (Name == "invalidate<" NAME ">") { \ Index: llvm/trunk/lib/Transforms/IPO/FunctionAttrs.cpp =================================================================== --- llvm/trunk/lib/Transforms/IPO/FunctionAttrs.cpp +++ llvm/trunk/lib/Transforms/IPO/FunctionAttrs.cpp @@ -1031,9 +1031,11 @@ } PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C, - CGSCCAnalysisManager &AM) { + CGSCCAnalysisManager &AM, + LazyCallGraph &CG, + CGSCCUpdateResult &) { FunctionAnalysisManager &FAM = - AM.getResult(C).getManager(); + AM.getResult(C, CG).getManager(); // We pass a lambda into functions to wire them up to the analysis manager // for getting function analyses. Index: llvm/trunk/test/Other/cgscc-iterate-function-mutation.ll =================================================================== --- llvm/trunk/test/Other/cgscc-iterate-function-mutation.ll +++ llvm/trunk/test/Other/cgscc-iterate-function-mutation.ll @@ -0,0 +1,341 @@ +; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(simplify-cfg))' -S < %s | FileCheck %s + +declare void @readnone() readnone +declare void @unknown() +declare void @reference_function_pointer(void()*) readnone + +; The @test1_* set of functions checks that when we mutate functions with +; simplify-cfg to delete call edges and this ends up splitting both the SCCs +; and the RefSCCs that those functions are in, we re-run the CGSCC passes to +; observe the refined call graph structure. + +; CHECK: define void @test1_a() { +define void @test1_a() { + call void @test1_b1() + call void @test1_b2() + call void @test1_b3() + call void @test1_b4() + ret void +} + +; CHECK: define void @test1_b1() #0 { +define void @test1_b1() { + call void @readnone() + ret void +} + +; CHECK: define void @test1_b2() #0 { +define void @test1_b2() { + call void @readnone() + br i1 false, label %dead, label %exit + +dead: + call void @test1_a() + br label %exit + +exit: + ret void +} + +; CHECK: define void @test1_b3() { +define void @test1_b3() { + call void @unknown() + br i1 false, label %dead, label %exit + +dead: + call void @test1_a() + br label %exit + +exit: + ret void +} + +; CHECK: define void @test1_b4() #0 { +define void @test1_b4() { + call void @readnone() + br i1 false, label %dead, label %exit + +dead: + call void @test1_a() + br label %exit + +exit: + ret void +} + + +; The @test2_* set of functions provide similar checks to @test1_* but only +; splitting the SCCs while leaving the RefSCC intact. This is accomplished by +; having dummy ref edges to the root function. + +; CHECK: define void @test2_a() { +define void @test2_a() { + call void @test2_b1() + call void @test2_b2() + call void @test2_b3() + call void @test2_b4() + ret void +} + +; CHECK: define void @test2_b1() #0 { +define void @test2_b1() { + call void @readnone() + ret void +} + +; CHECK: define void @test2_b2() #0 { +define void @test2_b2() { + call void @reference_function_pointer(void()* @test2_a) + br i1 false, label %dead, label %exit + +dead: + call void @test2_a() + br label %exit + +exit: + ret void +} + +; CHECK: define void @test2_b3() { +define void @test2_b3() { + call void @reference_function_pointer(void()* @test2_a) + call void @unknown() + br i1 false, label %dead, label %exit + +dead: + call void @test2_a() + br label %exit + +exit: + ret void +} + +; CHECK: define void @test2_b4() #0 { +define void @test2_b4() { + call void @reference_function_pointer(void()* @test2_a) + br i1 false, label %dead, label %exit + +dead: + call void @test2_a() + br label %exit + +exit: + ret void +} + + +; The @test3_* set of functions are the same challenge as @test1_* but with +; multiple layers that have to be traversed in the correct order instead of +; a single node. + +; CHECK: define void @test3_a() { +define void @test3_a() { + call void @test3_b11() + call void @test3_b21() + call void @test3_b31() + call void @test3_b41() + ret void +} + +; CHECK: define void @test3_b11() #0 { +define void @test3_b11() { + call void @test3_b12() + ret void +} + +; CHECK: define void @test3_b12() #0 { +define void @test3_b12() { + call void @test3_b13() + ret void +} + +; CHECK: define void @test3_b13() #0 { +define void @test3_b13() { + call void @readnone() + ret void +} + +; CHECK: define void @test3_b21() #0 { +define void @test3_b21() { + call void @test3_b22() + ret void +} + +; CHECK: define void @test3_b22() #0 { +define void @test3_b22() { + call void @test3_b23() + ret void +} + +; CHECK: define void @test3_b23() #0 { +define void @test3_b23() { + call void @readnone() + br i1 false, label %dead, label %exit + +dead: + call void @test3_a() + br label %exit + +exit: + ret void +} + +; CHECK: define void @test3_b31() { +define void @test3_b31() { + call void @test3_b32() + ret void +} + +; CHECK: define void @test3_b32() { +define void @test3_b32() { + call void @test3_b33() + ret void +} + +; CHECK: define void @test3_b33() { +define void @test3_b33() { + call void @unknown() + br i1 false, label %dead, label %exit + +dead: + call void @test3_a() + br label %exit + +exit: + ret void +} + +; CHECK: define void @test3_b41() #0 { +define void @test3_b41() { + call void @test3_b42() + ret void +} + +; CHECK: define void @test3_b42() #0 { +define void @test3_b42() { + call void @test3_b43() + ret void +} + +; CHECK: define void @test3_b43() #0 { +define void @test3_b43() { + call void @readnone() + br i1 false, label %dead, label %exit + +dead: + call void @test3_a() + br label %exit + +exit: + ret void +} + + +; The @test4_* functions exercise the same core challenge as the @test2_* +; functions, but again include long chains instead of single nodes and ensure +; we traverse the chains in the correct order. + +; CHECK: define void @test4_a() { +define void @test4_a() { + call void @test4_b11() + call void @test4_b21() + call void @test4_b31() + call void @test4_b41() + ret void +} + +; CHECK: define void @test4_b11() #0 { +define void @test4_b11() { + call void @test4_b12() + ret void +} + +; CHECK: define void @test4_b12() #0 { +define void @test4_b12() { + call void @test4_b13() + ret void +} + +; CHECK: define void @test4_b13() #0 { +define void @test4_b13() { + call void @readnone() + ret void +} + +; CHECK: define void @test4_b21() #0 { +define void @test4_b21() { + call void @test4_b22() + ret void +} + +; CHECK: define void @test4_b22() #0 { +define void @test4_b22() { + call void @test4_b23() + ret void +} + +; CHECK: define void @test4_b23() #0 { +define void @test4_b23() { + call void @reference_function_pointer(void()* @test4_a) + br i1 false, label %dead, label %exit + +dead: + call void @test4_a() + br label %exit + +exit: + ret void +} + +; CHECK: define void @test4_b31() { +define void @test4_b31() { + call void @test4_b32() + ret void +} + +; CHECK: define void @test4_b32() { +define void @test4_b32() { + call void @test4_b33() + ret void +} + +; CHECK: define void @test4_b33() { +define void @test4_b33() { + call void @reference_function_pointer(void()* @test4_a) + call void @unknown() + br i1 false, label %dead, label %exit + +dead: + call void @test4_a() + br label %exit + +exit: + ret void +} + +; CHECK: define void @test4_b41() #0 { +define void @test4_b41() { + call void @test4_b42() + ret void +} + +; CHECK: define void @test4_b42() #0 { +define void @test4_b42() { + call void @test4_b43() + ret void +} + +; CHECK: define void @test4_b43() #0 { +define void @test4_b43() { + call void @reference_function_pointer(void()* @test4_a) + br i1 false, label %dead, label %exit + +dead: + call void @test4_a() + br label %exit + +exit: + ret void +} + +; CHECK: attributes #0 = { readnone } Index: llvm/trunk/test/Other/cgscc-observe-devirt.ll =================================================================== --- llvm/trunk/test/Other/cgscc-observe-devirt.ll +++ llvm/trunk/test/Other/cgscc-observe-devirt.ll @@ -0,0 +1,133 @@ +; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S < %s | FileCheck %s --check-prefix=BEFORE +; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn))' -S < %s | FileCheck %s --check-prefix=AFTER +; +; Also check that adding an extra CGSCC pass after the function update but +; without requiring the outer manager to iterate doesn't break any invariant. +; RUN: opt -aa-pipeline=basic-aa -passes='cgscc(function-attrs,function(gvn),function-attrs)' -S < %s | FileCheck %s --check-prefix=AFTER2 + +declare void @readnone() readnone +declare void @unknown() + +; The @test1_* functions check that when we refine an indirect call to a direct +; call, even if it doesn't change the call graph structure, we revisit the SCC +; passes to reflect the more precise information. +; FIXME: Currently, this isn't implemented in the new pass manager and so we +; only get this with AFTER2, not with AFTER. + +; BEFORE: define void @test1_a() { +; AFTER: define void @test1_a() { +; AFTER2: define void @test1_a() { +define void @test1_a() { + %fptr = alloca void()* + store void()* @unknown, void()** %fptr + %f = load void()*, void()** %fptr + call void %f() + ret void +} + +; BEFORE: define void @test1_b() { +; AFTER: define void @test1_b() { +; AFTER2: define void @test1_b() #0 { +define void @test1_b() { + %fptr = alloca void()* + store void()* @readnone, void()** %fptr + %f = load void()*, void()** %fptr + call void %f() + ret void +} + +; The @test2_* checks that if we refine an indirect call to a direct call and +; in the process change the very structure of the call graph we also revisit +; that component of the graph and do so in an up-to-date fashion. + +; BEFORE: define void @test2_a1() { +; AFTER: define void @test2_a1() { +; AFTER2: define void @test2_a1() { +define void @test2_a1() { + %fptr = alloca void()* + store void()* @test2_b2, void()** %fptr + store void()* @test2_b1, void()** %fptr + %f = load void()*, void()** %fptr + call void %f() + ret void +} + +; BEFORE: define void @test2_b1() { +; AFTER: define void @test2_b1() { +; AFTER2: define void @test2_b1() { +define void @test2_b1() { + call void @unknown() + call void @test2_a1() + ret void +} + +; BEFORE: define void @test2_a2() { +; AFTER: define void @test2_a2() #0 { +; AFTER2: define void @test2_a2() #0 { +define void @test2_a2() { + %fptr = alloca void()* + store void()* @test2_b1, void()** %fptr + store void()* @test2_b2, void()** %fptr + %f = load void()*, void()** %fptr + call void %f() + ret void +} + +; BEFORE: define void @test2_b2() { +; AFTER: define void @test2_b2() #0 { +; AFTER2: define void @test2_b2() #0 { +define void @test2_b2() { + call void @readnone() + call void @test2_a2() + ret void +} + + +; The @test3_* set of functions exercise a case where running function passes +; introduces a new post-order relationship that was not present originally and +; makes sure we walk across the SCCs in that order. + +; CHECK: define void @test3_a() { +define void @test3_a() { + call void @test3_b1() + call void @test3_b2() + call void @test3_b3() + call void @unknown() + ret void +} + +; CHECK: define void @test3_b1() #0 { +define void @test3_b1() { + %fptr = alloca void()* + store void()* @test3_a, void()** %fptr + store void()* @readnone, void()** %fptr + %f = load void()*, void()** %fptr + call void %f() + ret void +} + +; CHECK: define void @test3_b2() #0 { +define void @test3_b2() { + %fptr = alloca void()* + store void()* @test3_a, void()** %fptr + store void()* @test3_b2, void()** %fptr + store void()* @test3_b3, void()** %fptr + store void()* @test3_b1, void()** %fptr + %f = load void()*, void()** %fptr + call void %f() + ret void +} + +; CHECK: define void @test3_b3() #0 { +define void @test3_b3() { + %fptr = alloca void()* + store void()* @test3_a, void()** %fptr + store void()* @test3_b2, void()** %fptr + store void()* @test3_b3, void()** %fptr + store void()* @test3_b1, void()** %fptr + %f = load void()*, void()** %fptr + call void %f() + ret void +} + +; CHECK: attributes #0 = { readnone } Index: llvm/trunk/test/Other/new-pass-manager.ll =================================================================== --- llvm/trunk/test/Other/new-pass-manager.ll +++ llvm/trunk/test/Other/new-pass-manager.ll @@ -23,9 +23,9 @@ ; CHECK-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> ; CHECK-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-CGSCC-PASS-NEXT: Running an SCC pass across the RefSCC: [(foo)] -; CHECK-CGSCC-PASS-NEXT: Starting llvm::LazyCallGraph::SCC pass manager run +; CHECK-CGSCC-PASS-NEXT: Starting CGSCC pass manager run ; CHECK-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass -; CHECK-CGSCC-PASS-NEXT: Finished llvm::LazyCallGraph::SCC pass manager run +; CHECK-CGSCC-PASS-NEXT: Finished CGSCC pass manager run ; CHECK-CGSCC-PASS-NEXT: Finished llvm::Module pass manager run ; RUN: opt -disable-output -disable-verify -debug-pass-manager \ @@ -134,7 +134,7 @@ ; CHECK-ANALYSES: Starting llvm::Module pass manager run ; CHECK-ANALYSES: Running pass: RequireAnalysisPass ; CHECK-ANALYSES: Running analysis: NoOpModuleAnalysis -; CHECK-ANALYSES: Starting llvm::LazyCallGraph::SCC pass manager run +; CHECK-ANALYSES: Starting CGSCC pass manager run ; CHECK-ANALYSES: Running pass: RequireAnalysisPass ; CHECK-ANALYSES: Running analysis: NoOpCGSCCAnalysis ; CHECK-ANALYSES: Starting llvm::Function pass manager run @@ -235,7 +235,7 @@ ; CHECK-INVALIDATE-ALL-CG: Starting llvm::Module pass manager run ; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL-CG-NOT: Running analysis: NoOpModuleAnalysis -; CHECK-INVALIDATE-ALL-CG: Starting llvm::LazyCallGraph::SCC pass manager run +; CHECK-INVALIDATE-ALL-CG: Starting CGSCC pass manager run ; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpCGSCCAnalysis ; CHECK-INVALIDATE-ALL-CG: Starting llvm::Function pass manager run @@ -250,7 +250,7 @@ ; CHECK-INVALIDATE-ALL-CG: Invalidating analysis: NoOpCGSCCAnalysis ; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass ; CHECK-INVALIDATE-ALL-CG: Running analysis: NoOpCGSCCAnalysis -; CHECK-INVALIDATE-ALL-CG: Finished llvm::LazyCallGraph::SCC pass manager run +; CHECK-INVALIDATE-ALL-CG: Finished CGSCC pass manager run ; CHECK-INVALIDATE-ALL-CG-NOT: Invalidating analysis: NoOpCGSCCAnalysis ; CHECK-INVALIDATE-ALL-CG: Invalidating analysis: NoOpModuleAnalysis ; CHECK-INVALIDATE-ALL-CG: Running pass: RequireAnalysisPass @@ -381,18 +381,18 @@ ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}> ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running an SCC pass across the RefSCC: [(foo)] -; CHECK-REPEAT-CGSCC-PASS-NEXT: Starting llvm::LazyCallGraph::SCC pass manager run +; CHECK-REPEAT-CGSCC-PASS-NEXT: Starting CGSCC pass manager run ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running pass: RepeatedPass -; CHECK-REPEAT-CGSCC-PASS-NEXT: Starting llvm::LazyCallGraph::SCC pass manager run +; CHECK-REPEAT-CGSCC-PASS-NEXT: Starting CGSCC pass manager run ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass -; CHECK-REPEAT-CGSCC-PASS-NEXT: Finished llvm::LazyCallGraph::SCC pass manager run -; CHECK-REPEAT-CGSCC-PASS-NEXT: Starting llvm::LazyCallGraph::SCC pass manager run +; CHECK-REPEAT-CGSCC-PASS-NEXT: Finished CGSCC pass manager run +; CHECK-REPEAT-CGSCC-PASS-NEXT: Starting CGSCC pass manager run ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass -; CHECK-REPEAT-CGSCC-PASS-NEXT: Finished llvm::LazyCallGraph::SCC pass manager run -; CHECK-REPEAT-CGSCC-PASS-NEXT: Starting llvm::LazyCallGraph::SCC pass manager run +; CHECK-REPEAT-CGSCC-PASS-NEXT: Finished CGSCC pass manager run +; CHECK-REPEAT-CGSCC-PASS-NEXT: Starting CGSCC pass manager run ; CHECK-REPEAT-CGSCC-PASS-NEXT: Running pass: NoOpCGSCCPass -; CHECK-REPEAT-CGSCC-PASS-NEXT: Finished llvm::LazyCallGraph::SCC pass manager run -; CHECK-REPEAT-CGSCC-PASS-NEXT: Finished llvm::LazyCallGraph::SCC pass manager run +; CHECK-REPEAT-CGSCC-PASS-NEXT: Finished CGSCC pass manager run +; CHECK-REPEAT-CGSCC-PASS-NEXT: Finished CGSCC pass manager run ; CHECK-REPEAT-CGSCC-PASS-NEXT: Finished llvm::Module pass manager run ; RUN: opt -disable-output -disable-verify -debug-pass-manager \ Index: llvm/trunk/test/Other/pass-pipeline-parsing.ll =================================================================== --- llvm/trunk/test/Other/pass-pipeline-parsing.ll +++ llvm/trunk/test/Other/pass-pipeline-parsing.ll @@ -106,10 +106,10 @@ ; RUN: | FileCheck %s --check-prefix=CHECK-TWO-NOOP-CG ; CHECK-TWO-NOOP-CG: Starting llvm::Module pass manager run ; CHECK-TWO-NOOP-CG: Running pass: ModuleToPostOrderCGSCCPassAdaptor -; CHECK-TWO-NOOP-CG: Starting llvm::LazyCallGraph::SCC pass manager run +; CHECK-TWO-NOOP-CG: Starting CGSCC pass manager run ; CHECK-TWO-NOOP-CG: Running pass: NoOpCGSCCPass ; CHECK-TWO-NOOP-CG: Running pass: NoOpCGSCCPass -; CHECK-TWO-NOOP-CG: Finished llvm::LazyCallGraph::SCC pass manager run +; CHECK-TWO-NOOP-CG: Finished CGSCC pass manager run ; CHECK-TWO-NOOP-CG: Finished llvm::Module pass manager run ; RUN: opt -disable-output -debug-pass-manager \ @@ -122,14 +122,14 @@ ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass ; CHECK-NESTED-MP-CG-FP: Finished llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: ModuleToPostOrderCGSCCPassAdaptor -; CHECK-NESTED-MP-CG-FP: Starting llvm::LazyCallGraph::SCC pass manager run +; CHECK-NESTED-MP-CG-FP: Starting CGSCC pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpCGSCCPass ; CHECK-NESTED-MP-CG-FP: Running pass: CGSCCToFunctionPassAdaptor ; CHECK-NESTED-MP-CG-FP: Starting llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass ; CHECK-NESTED-MP-CG-FP: Finished llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpCGSCCPass -; CHECK-NESTED-MP-CG-FP: Finished llvm::LazyCallGraph::SCC pass manager run +; CHECK-NESTED-MP-CG-FP: Finished CGSCC pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: ModuleToFunctionPassAdaptor ; CHECK-NESTED-MP-CG-FP: Starting llvm::Function pass manager run ; CHECK-NESTED-MP-CG-FP: Running pass: NoOpFunctionPass Index: llvm/trunk/unittests/Analysis/CGSCCPassManagerTest.cpp =================================================================== --- llvm/trunk/unittests/Analysis/CGSCCPassManagerTest.cpp +++ llvm/trunk/unittests/Analysis/CGSCCPassManagerTest.cpp @@ -59,7 +59,7 @@ TestSCCAnalysis(int &Runs) : Runs(Runs) {} - Result run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM) { + Result run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &) { ++Runs; return Result(C.size()); } @@ -149,13 +149,14 @@ AnalyzedModuleFunctionCount(AnalyzedModuleFunctionCount), OnlyUseCachedResults(OnlyUseCachedResults) {} - PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM) { + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, + LazyCallGraph &CG, CGSCCUpdateResult &UR) { ++RunCount; const ModuleAnalysisManager &MAM = - AM.getResult(C).getManager(); + AM.getResult(C, CG).getManager(); FunctionAnalysisManager &FAM = - AM.getResult(C).getManager(); + AM.getResult(C, CG).getManager(); if (TestModuleAnalysis::Result *TMA = MAM.getCachedResult( *C.begin()->getFunction().getParent())) @@ -171,7 +172,7 @@ AnalyzedInstrCount += FAR->InstructionCount; } else { // Typical path just runs the analysis as needed. - TestSCCAnalysis::Result &AR = AM.getResult(C); + TestSCCAnalysis::Result &AR = AM.getResult(C, CG); AnalyzedSCCFunctionCount += AR.FunctionCount; for (LazyCallGraph::Node &N : C) { TestFunctionAnalysis::Result &FAR =