Index: include/llvm/ADT/NestedSCC.h =================================================================== --- /dev/null +++ include/llvm/ADT/NestedSCC.h @@ -0,0 +1,439 @@ +//===- ADT/NestedSCC.h - Nested Strongly Connected Comp. -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// facebook T38406375 +/// +/// +/// This builds a set of nested DAG's corresponding to the +/// nested strongly connected components of a graph. This initial implemented +/// only works tested for control flow graphs of a function. +/// +/// The result is a top-level graph-like object each node of which references +/// either a basic block or a nested graph. Edges are placed between two nodes +/// if there is an edge in the underlying graph. Edges are unique even +/// when the underlying graph may have multiple edges. A single basic block +/// make be have a self-edge in the underlying graph and this can be determined +/// by the hasLoop method. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_NESTEDSCC_H +#define LLVM_ADT_NESTEDSCC_H + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +namespace llvm { +//===----------------------------------------------------------------------===// +// This structure can be used to have a view into a subgraph of some +// other graph. The GT parameter is expected to be a GraphTraits class. +// The Iterator is used to traverse successors (GT child nodes) and uses +// the underling graph iteration which is filtered to ignore nodes not +// in the current subgraph and also to ignore back edges into the entry node. +// This last is motivated by the current use case of finding nested strongly +// connected components. +//===----------------------------------------------------------------------===// +template class SubGraph { +public: + /// A node in the base graph. + using BaseNodeRef = typename GT::NodeRef; + /// A node in the subgraph which is a node in the base graph + /// plus a pointer to this subgraph. + using NodeRef = std::pair; + static BaseNodeRef baseNode(NodeRef N) { return N.first; } + static const SubGraph *subGraph(NodeRef N) { return N.second; } + + /// Construction which takes an iterable collection of base + /// grap nodes and the entry node of the subgraph. + template + SubGraph(const Collection &activeNodes, BaseNodeRef entryNode) + : entryNode(entryNode) { + for (auto N : activeNodes) + active.insert(N); + } + + /// The unique entry node for this subgraph. + NodeRef getEntryNode() const { return {entryNode, this}; } + + /// The set of active base graph nodes for this subgraph. + const DenseSet &getActiveNodes() { return active; } + + class ChildIterator; + +private: + // The collection of nodes which are part of the subgraph. + DenseSet active; + // The designated "entry" node. + BaseNodeRef entryNode; +}; + +/// This iterator returns nodes in the subgraph by filtering +/// the nodes in the base graph via the "active" set. +/// This is used for nested SCC algorithms and so it also +/// filters out all edges into the "entry node" in order +/// to break cycles. +template class SubGraph::ChildIterator { + using BaseIterator = typename GT::ChildIteratorType; + +public: + ChildIterator(NodeRef N) + : source(baseNode(N)), Cur(GT::child_begin(baseNode(N))), + End(GT::child_end(baseNode(N))), G(subGraph(N)) { + if (Cur != End && !validSuccessor(*Cur)) + advance(); + } + ChildIterator(BaseIterator Cur, const SubGraph *G) + : source(nullptr), Cur(Cur), End(Cur), G(G) {} + NodeRef operator*() { + assert(*Cur != G->entryNode); + return {*Cur, G}; + } + bool operator!=(const ChildIterator &It) { return Cur != It.Cur; } + bool operator==(const ChildIterator &It) { return Cur == It.Cur; } + ChildIterator &operator++() { + advance(); + return *this; + } + ChildIterator operator++(int) { + ChildIterator temp(*this); + advance(); + return temp; + } + +private: + bool validSuccessor(BaseNodeRef N) { + return N != G->entryNode && G->active.count(N); + } + void advance() { + if (Cur == End) + return; + Cur++; + while (Cur != End && !validSuccessor(*Cur)) + Cur++; + } + BaseNodeRef source; // TODO(dcallahan) remove this field + BaseIterator Cur, End; + const SubGraph *G; +}; +namespace NestedSCC_ { +struct empty {}; +} // namespace NestedSCC_ + +//===----------------------------------------------------------------------===// +template +class SCCNest; + +/// This is a recursive structure where each instances corresponds to a +/// a function body or a strongly connected component which has been +/// recursively decomposed by deleting "back edges" into a designated +/// entry node. Note that "irreducible" loops may be decomposed into +/// a nest of loops instead of a single natural loop. +/// Base is used as public base class for elements to allow a client to +/// customize with per-component fields. +template +class NestedSCC : public Base { +public: + class Element; + using BaseNodeRef = typename GraphTraits::NodeRef; + using SubGraphT = SubGraph; + using SCCIterT = scc_iterator; + NestedSCC(GraphT G); + NestedSCC(SubGraphT &G, NestedSCC *Parent, unsigned ParentIndex, + GraphT BaseG); + + /// size -- Number of nodes in this strongly connected component + /// where a node may be a base graph node or a nested strongly + /// connected component. + unsigned size() const { return static_cast(SCCs.size()); } + /// The nesting depth of the component. + unsigned getDepth() const { return depth; } + + /// A iterable of all of the nodes in the component. This collection + /// is topologically sorted to respect non-back-edges in the base graph. + const SmallVectorImpl &getSCCs() const { return SCCs; } + + /// Return the i-th node in this component. 0 <= i < size() + const Element &at(unsigned i) const { return SCCs[i]; } + + /// Return the index of an element. + unsigned index(const Element &N) const { + return static_cast(&N - &at(0)); + } + + void dump(raw_ostream &OS, bool all = true); + + using NodeIterator = typename SmallVectorImpl::const_iterator; + NodeIterator node_begin() const { return SCCs.begin(); } + NodeIterator node_end() const { return SCCs.end(); } + + /// Return the immediately containing strongly connected component + /// for this component. This is null for the outermost. + NestedSCC *getParent() const { return Parent; } + + /// Return the index of the node in the parent strongly connected + /// component corresponding to this scc. + unsigned getParentIndex() const { return ParentIndex; } + + class ChildIterator; + +private: + friend class SCCNest; + + // Helper functions which completes building the components + // where the nodes in each componets are stored in this vector. + // Initially these are in reverse toplogical order as that is the + // order the underlying scc_iterator<> produces them. + void build(SmallVectorImpl> &components, + GraphT BaseB); + + // Reference to the associated node in the containing SCC. + NestedSCC *Parent; + unsigned ParentIndex; + + // Nesting depth: zero for the top-most, 1 more than parent + // othersise. + unsigned depth; + + // Components are numbered and stored in the vector SCCs. + // Components are toplogically ordered (ignoring backed edgds + // into the entry node). + SmallVector SCCs; +}; + +// This class is the top-level repesented of the collection of nested +// strongly connected components. It holds a pointer to the top-most (root) +// component and a map from base graph nodes to the innermost component where +// that node appears. +template +class SCCNest { +public: + using NestedSCCT = NestedSCC; + class DepthFirstIterator; + using BaseNodeRef = typename GraphTraits::NodeRef; + using SubGraphT = SubGraph; + using SCCIterT = scc_iterator; + SCCNest(GraphT G); + + NestedSCCT &getRoot() const { return *root; } + + /// A reference to an element in a paricular strongly connected component + /// realized as a pair of the component and the node indexer. + using ComponentRef = std::pair; + static NestedSCCT *component(ComponentRef C) { return C.first; } + static unsigned index(ComponentRef C) { return C.second; } + + /// Defined only for the outer most SCC, map a base graph node to the + /// deepest containing stronglly connected compnent in which it occurs. + ComponentRef getComponent(BaseNodeRef BN) const { + auto iter = map.find(BN); + if (iter == map.end()) + return {nullptr, 0}; + return iter->second; + } + + void dump(raw_ostream &OS) { root->dump(OS, /*all*/ true); } + +private: + // Combine all maps into the top-level map. This should + // only be invoked for the top-level map. + void buildMap(); + + // Top-most strongly-connected component + NestedSCCT *root; + + // map basic blocks to the associated element in + // SCCs which corresponds to the component containing + // the block. After construction this is only valid + // for the top-level strongly connected component. + DenseMap map; +}; + +template +SCCNest::SCCNest(GraphT G) { + root = new NestedSCCT(G); + buildMap(); +} + +/// This class represents a single strongly connected component +/// which might be a singlton node (perhaps a self-loop) or a nested +/// instance. We record as adjacency lists the elements the induced +/// edges from the base graph. +template +class NestedSCC::Element { +public: + Element(const SmallVectorImpl &It, NestedSCC *Parent, + unsigned ParentIndex, GraphT BaseG); + + NestedSCC *getNested() const { return nested.get(); } + BaseNodeRef getBaseNode() const { + assert(!nested); + return leaf; + } + +private: + std::unique_ptr nested; // may be null + BaseNodeRef leaf = nullptr; // defined if nested is null + friend class NestedSCC; +}; + +template +NestedSCC::NestedSCC(GraphT G) + : Parent(nullptr), ParentIndex(0), depth(0) { + + SmallVector, 32> components; + for (scc_iterator It = scc_begin(G); !It.isAtEnd(); ++It) + components.emplace_back((*It).begin(), (*It).end()); + build(components, G); +} + +template +NestedSCC::NestedSCC(SubGraphT &G, NestedSCC *Parent, + unsigned ParentIndex, GraphT BaseG) + : Parent(Parent), ParentIndex(ParentIndex), depth(Parent->depth + 1) { + + SmallVector, 32> components; + for (auto SIt = SCCIterT::begin(G); !SIt.isAtEnd(); ++SIt) { + components.emplace_back(); + for (auto Pair : *SIt) + components.back().push_back(SubGraphT::baseNode(Pair)); + } + build(components, BaseG); +} + +template +void NestedSCC::build( + SmallVectorImpl> &components, GraphT G) { + + // Reverse order of nodes so they are topological + std::reverse(components.begin(), components.end()); + + // Recursively build the nested SCCs + for (unsigned i = 0; i < components.size(); i++) + SCCs.emplace_back(components[i], this, i, G); +} + +template +NestedSCC::Element::Element( + const SmallVectorImpl &Nodes, NestedSCC *Parent, + unsigned ParentIndex, GraphT BaseG) { + if (Nodes.size() == 1) { + leaf = Nodes[0]; + return; + } + SubGraphT subgraph(Nodes, Nodes.back()); + nested.reset(new NestedSCC(subgraph, Parent, ParentIndex, BaseG)); +} + +//===----------------------------------------------------------------------===// +// Iterator and range-iteration support +//===----------------------------------------------------------------------===// +template +iterator_range::NodeIterator> +scc_nodes(NestedSCC &SCCs) { + return {SCCs.node_begin(), SCCs.node_end()}; +} + +template +class SCCNest::DepthFirstIterator { +public: + using NestSCCT = SCCNest::NestedSCCT; + DepthFirstIterator(NestedSCCT &root) : Stack({{&root, 0u}}) {} + DepthFirstIterator() = default; + NestedSCCT &operator*() { return *(Stack.back().first); } + void operator++() { + do { + NestedSCCT *cur; + unsigned nodeIdx; + std::tie(cur, nodeIdx) = Stack.back(); + for (; nodeIdx < cur->size(); nodeIdx++) { + auto &node = cur->at(nodeIdx); + if (node.getNested()) { + Stack.back().second = nodeIdx + 1; + Stack.emplace_back(node.getNested(), 0); + return; + } + } + Stack.pop_back(); + } while (!Stack.empty()); + } + bool operator!=(const DepthFirstIterator &Other) { + return Stack.size() != Other.Stack.size(); + } + +private: + SmallVector, 4> Stack; +}; + +template +typename SCCNest::DepthFirstIterator +scc_df_begin(SCCNest &SCC) { + return typename SCCNest::DepthFirstIterator( + SCC.getRoot()); +} +template +typename SCCNest::DepthFirstIterator +scc_df_end(SCCNest &SCC) { + return typename SCCNest::DepthFirstIterator(); +} +template +iterator_range::DepthFirstIterator> +scc_depth_first(SCCNest &SCC) { + return {scc_df_begin(SCC), scc_df_end(SCC)}; +} + +template +void SCCNest::buildMap() { + for (NestedSCCT &SCC : scc_depth_first(*this)) + for (typename NestedSCCT::Element &E : SCC.SCCs) + if (E.getNested() == nullptr) + map.try_emplace(E.getBaseNode(), ComponentRef{&SCC, SCC.index(E)}); +} + +//===----------------------------------------------------------------------===// +// Additional debugging support +//===----------------------------------------------------------------------===// + +template +void NestedSCC::dump(llvm::raw_ostream &OS, bool all) { + OS << "Nested SCC\n"; + DenseMap number; + unsigned i = 0; + for (NestedSCC &cur : scc_depth_first(*this)) + number[&cur] = i++; + + for (NestedSCC &cur : scc_depth_first(*this)) { + unsigned N = cur.SCCs.size(); + unsigned i = number[&cur]; + for (unsigned j = 0; j < N; j++) { + OS << i << " " << j; + auto &C = cur.at(j); + if (!C.getNested()) { + OS << " " << C.getBaseNode()->getName(); + } else { + auto &E = C.getNested()->at(0); + OS << " {{" << number[C.getNested()] << " " + << E.getBaseNode()->getName() << " }}"; + } + OS << "\n"; + } + if (!all) + break; + } +} + +} // namespace llvm +#endif Index: lib/Transforms/IPO/CMakeLists.txt =================================================================== --- lib/Transforms/IPO/CMakeLists.txt +++ lib/Transforms/IPO/CMakeLists.txt @@ -28,6 +28,7 @@ PartialInlining.cpp PassManagerBuilder.cpp PruneEH.cpp + SampleEdgeWeights.cpp # facebook T38406375 SampleProfile.cpp SCCP.cpp StripDeadPrototypes.cpp Index: lib/Transforms/IPO/SampleEdgeWeights.h =================================================================== --- /dev/null +++ lib/Transforms/IPO/SampleEdgeWeights.h @@ -0,0 +1,29 @@ +//===- SampleEdgeWeights.hp - Determine Edges weights from samples --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// facebook T38406375 +// This file implements the SampleProfileLoader transformation. This pass + +#ifndef LLVM_LIB_TRANSFORMS_IPO_SAMPLEEDGEWEIGHTS_H +#define LLVM_LIB_TRANSFORMS_IPO_SAMPLEEDGEWEIGHTS_H 1 +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" + +namespace llvm { +namespace sampleprof { +using CFGEdge = std::pair; +using EdgeWeightMap = DenseMap; +using BlockWeightMap = DenseMap; + +EdgeWeightMap determineEdgeWeights(Function &F, BlockWeightMap & BlockWeights); +} // namespace sampleprof +} // namespace llvm + +#endif Index: lib/Transforms/IPO/SampleEdgeWeights.cpp =================================================================== --- /dev/null +++ lib/Transforms/IPO/SampleEdgeWeights.cpp @@ -0,0 +1,1464 @@ +//===- SampleEdgeWeights.cpp - Determine Edges weights from samples -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// facebook T38406375 +// See discusstion document: https://fburl.com/n7lu6t5w +// +// This file implements code to propagate information from sampled block +// weights to edge weights. On a DAG, this involves application of the rule that +// sum of weights into an edge equals sum of weights out of an edge equals the +// weight on the block. +// +// This is extended to general graphs by finding nested strongly connected +// components and form a tree of DAGS by collapsing a set of strongly +// connectebuildd nodes to a single synthetic node to determine edge weights on +// the resulting DAG. +// +// When processing a nested graph, we synthesize entry and exit node whose +// weight is inherited from the containing graph. We bias inference to work +// top-down in this set of loops so we have estimates for entry/exit edges from +// containing context rather then inferring them as excess from the loop. +// +// We solve this problem by materializing a set of equations. These equations +// involve variables for block weights, edge weights, and additional special +// conditions involving loops, control flow relationships, and other heuristics. +// We solve these equations to determine values on edges which are returned +// to the caller as a map from edges to weights. + +#include "SampleEdgeWeights.h" +#include "SubsetCFG.h" + +#include "llvm/ADT/NestedSCC.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" + +#ifndef NDEBUG +#include "llvm/Support/GraphWriter.h" +#include +#include +#endif + +#define DEBUG_TYPE "sample-profile" + +using namespace llvm; +#ifndef NDEBUG +static cl::list DotFuncsList( + "edge-weight-dot-funcs", + cl::value_desc("comma separated list of mangled function names"), + cl::desc("print DOT files for sample edge weights for these functions"), + cl::CommaSeparated, cl::Hidden); +static bool shouldPrintDot(Function &F) { + return any_of(DotFuncsList, + [&F](const std::string &name) { return name == F.getName(); }); +} +#endif + +// Disable the heuristic of computing a maximum possible edge weight +// and using it if it "small enough" +static cl::opt NoMaxEstimate( + "sample-no-max-estimate", cl::init(false), + cl::desc("disable computing a maximum weight from profile data"), + cl::Hidden); + +using CFGEdge = std::pair; +using EdgeWeightMap = DenseMap; +using BlockWeightMap = DenseMap; + +// This specialization of GraphTraits is used to build strongly +// connected components of subgraphs of the control flow graph. +using CFSubGraph = SubGraph>; +namespace llvm { +template <> struct GraphTraits : public GraphTraits { + using NodeRef = CFSubGraph::NodeRef; + static NodeRef getEntryNode(const CFSubGraph &G) { return G.getEntryNode(); } + using ChildIteratorType = CFSubGraph::ChildIterator; + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(GraphTraits::child_end(N.first), + N.second); + } +}; +} // namespace llvm +namespace { +const uint64_t MAX_WEIGHT = std::numeric_limits::max(); +const unsigned NO_NODE = std::numeric_limits::max(); +const unsigned MULTIPLE_NODES = NO_NODE - 1; + +// Additional feields which are part of every strongly connected +// component. +struct SCCBase { + uint64_t estimated; + unsigned headNode = NO_NODE; + unsigned exitNode = NO_NODE; +#ifndef NDEBUG + unsigned number; +#endif +}; +} // namespace + +using SCCNestT = SCCNest, + GraphTraits, SCCBase>; +using NestedSCCT = SCCNestT::NestedSCCT; + +namespace { +using Edge = std::pair; +class WeightsDAGPrinter; +/// Subtraction which saturates at 0, used for weight values. +uint64_t difference(uint64_t x, uint64_t y) { return (x > y ? x - y : 0); } +/// Addition which saturates at maximum value +uint64_t sum(uint64_t x, uint64_t y) { + if (x + y < x) + return std::numeric_limits::max(); + return x + y; +} + +// We store names with variables when debugging only +#ifndef NDEBUG +#define NAME(x) x +#define PNAME(x) x, +#else +#define NAME(x) +#define PNAME(x) +#endif + +/// This is a helper class which determines values for missing block weights +/// and fills in EdgeWeights. +class EdgeWeightDeterminator { +public: + using ComponentRef = SCCNestT::ComponentRef; + using DTNode = const DomTreeNodeBase; + + EdgeWeightDeterminator(Function &F, SCCNestT &SCCs, + BlockWeightMap &BlockWeights, SubsetCFG &SG, + const SmallVectorImpl &returnBlocks) + : F(F), SG(SG), DT(SG), PDT(SG), returnBlocks(returnBlocks), SCCs(SCCs), + BlockWeights(BlockWeights) { + for (NestedSCCT &SCC : scc_depth_first(SCCs)) + SCCList.push_back(&SCC); +#ifndef NDEBUG + unsigned count = 0; + for (NestedSCCT *scc : SCCList) + scc->SCCBase::number = count++; +#endif + } + + /// Recurisvely iterate over all SCC's in a top-down order + EdgeWeightMap propagate(); + +private: +#ifndef NDEBUG + static unsigned invocationCount; +#endif + + /// Performan propagation analysis on a single SCC + void propagate(SCCNestT *SCC); + + /// We set SCCbase::estimate to be the largest weight + /// for a node in a loop. Inner loops are ignored unless + /// no node in the loop otherwise has a weight. + void initializeLoopEstimates(); + +#ifndef NDEBUG + /// Print the current graph to a "dot" file + void dot(unsigned pass, unsigned phase); + void dot(NestedSCCT *SCC, unsigned pass, unsigned phase); +#endif + + /// Current unction being analyzed. + Function &F; + /// The subset of the current function being analyzed. This excludes + /// nodes which are not reachable from the entry and from which no + /// return node is reachable. + SubsetCFG &SG; + + /// True if \p BB is in the active subset of F represented + /// by SG. + bool active(BasicBlock *BB) { return SG.getNode(BB); } + + /// The dominator tree of SG + SubsetDomTree DT; + /// The post-dominator tree of SG. + SubsetDomTree PDT; + /// collection of blocks containing a return instructions + const SmallVectorImpl &returnBlocks; + + /// The nested strongly connected components of SG. + SCCNestT &SCCs; + /// Top-down list of the strongly connected components + /// with in SCCs + std::vector SCCList; + /// Input initial block weights which may be updated + /// based on analysis of the control flow graph. + BlockWeightMap &BlockWeights; + Optional getBlockWeight(BasicBlock *BB) { + auto iter = BlockWeights.find(BB); + if (iter == BlockWeights.end()) + return {}; + return iter->second; + } + /// The computed weights on edges + EdgeWeightMap OutputEdgeWeights; + + // Each edge in the underling active control flow graph will generate + // one more equations representing constraints in values of node + // and edge weights. Here we buld those equations. + void induceEquationsFromEdges(BasicBlock *, BasicBlock *); + + // Record a unique entry node into a component + // or MUTIPLE_NODES if the region is irreducible. + void noteHeader(ComponentRef c) { + unsigned &headNode = c.first->SCCBase::headNode; + if (headNode == NO_NODE) + headNode = c.second; + else if (headNode != c.second) + headNode = MULTIPLE_NODES; + } + // Record a unique exit node from a component + // or MUTIPLE_NODES if the are multiple such nodes. + void noteExit(ComponentRef c) { + unsigned &exitNode = c.first->SCCBase::exitNode; + if (exitNode == NO_NODE) + exitNode = c.second; + else if (exitNode != c.second) + exitNode = MULTIPLE_NODES; + } + + /// Identifiy control-equivalent dominator node and add equations which + /// assert the equality of the block variables. + void addControlEquivalenceEquations(BasicBlock *); + + /// Return the most immediate dominator to the given block + /// in the same strongly connected component. + BasicBlock *controlEquivalentDominator(BasicBlock *); + + /// Adjust block weights so that a node X has a weight as large + /// as any node it dominates (or post-dominates) within the same + /// strongly connected component. + void propagateWeightsToDominators(); + template + void propagateWeightsToDominators(SubsetDomTree &DT); + + /// Add an equation that the sum of weights on all returns + /// is the same as the entry way. + void addReturnEquations(); + + /// Apply a heuristic that the loop header node has weight equal + /// to the entry weight and the largest weight in the loop. This is used + /// for simple single-entry, single-exit-at-top loops to account + /// for subsequent loop-rotation. + void loopEstimateEquations(); + + /// Use the set of equations and already set variables to infer values + /// of other variables. + void solveDefined(); + + /// Solve for maximum value of undefined variables. Consider if we + /// have three edges leaving a block with known weight and one edge + /// has a known weight, then we can set a maximum value for the other + /// two edges. + void minimalMax(); + + /// Finalize edge weights from equations for outgoing edges from + /// \p BB and store them in OutputEdgeWeights + void setFinalWeight(BasicBlock *BB); + + /// True if BB->BB is an edge in the active graph. + bool isSelfLoop(BasicBlock * BB) { return isSelfLoopSet.count(BB); } + /// Set of basic blocks which are self loops. + DenseSet isSelfLoopSet; + + // Utility functions for references to nodes in the nest + // of strongly connected components. + ComponentRef null{nullptr, 0}; + static bool isNull(ComponentRef c) { return c.first; } + ComponentRef lookup(BasicBlock *source) { return SCCs.getComponent(source); } + static ComponentRef parent(ComponentRef child) { + return {child.first->getParent(), child.first->getParentIndex()}; + } + static unsigned depth(ComponentRef c) { return c.first->getDepth(); } + + // Two artifical nodes which are not actually in the nest of strongly + // connected components but are used to synthesize variables. + ComponentRef entryNode(NestedSCCT *SCC) { return {SCC, SCC->size()}; } + ComponentRef exitNode(NestedSCCT *SCC) { return {SCC, SCC->size() + 1}; } + +#ifndef NDEBUG + /// Debugging function used name nodes in the nested of + /// strongly connected components. + std::string name(ComponentRef c) { + if (c.second == entryNode(c.first).second) + return ("entry" + Twine(c.first->SCCBase::number)).str(); + if (c.second == exitNode(c.first).second) + return ("exit" + Twine(c.first->SCCBase::number)).str(); + auto &Node = c.first->at(c.second); + if (NestedSCCT *child = Node.getNested()) + return ("scc" + Twine(child->SCCBase::number)).str(); + return Node.getBaseNode()->getName(); + } +#endif + + // Variables and equations are repesented by unsigned integers + // which are indicies into corresponding vectors. + // These classes are simple wrappers around unsigned values for + // stronger typing. + struct VarIndex { + unsigned value; + bool operator!=(VarIndex other) { return value != other.value; } + bool operator==(VarIndex other) { return value == other.value; } + + VarIndex(const VarIndex &v) : value(v.value) {} + VarIndex(unsigned value) : value(value) {} + }; + struct EqIndex { + unsigned value; + }; + + // A variable used in equations + struct Variable { + uint64_t weight = 0; // known weight + SmallVector + uses; // indicies of equations which mention this variable + NAME(std::string name;) // a name used only for debugging. + bool defined = false; // whether we have resolved a value for this variable + bool onQuene = false; // whether this variable is on a queue to be + // have its value propagated. + unsigned depth; // a depth of the variable related to the + // structure of the nested of strongly connected + // components, used to prioritize propagation. + Variable(PNAME(std::string name) unsigned depth) + : PNAME(name(std::move(name))) depth(depth) {} + }; + std::vector variables; + /// Create a new variable with specified name and depth. + VarIndex variable(PNAME(std::string name) unsigned depth) { + VarIndex var(static_cast(variables.size())); + variables.emplace_back(PNAME(std::move(name)) depth); + return var; + } + + // Utility functions for accessing variable state where + // a variable is represented as an index. + bool isDefined(VarIndex v) { return variables[v.value].defined; } + unsigned depth(VarIndex v) { return variables[v.value].depth; } +#ifndef NDEBUG + const std::string &name(VarIndex v) { return variables[v.value].name; } +#endif + uint64_t weight(VarIndex v) { return variables[v.value].weight; } + + void setVariable(VarIndex v, uint64_t w) { + Variable &var = variables[v.value]; + var.defined = true; + var.weight = w; + LLVM_DEBUG(dbgs() << "set " << var.name << " = " << w << "\n";); + } + + /// Compare implements a comparison function used to + /// order variables inside-out and then lower-weights first as + /// a heuritisc for propagation of values. + struct Compare { + std::vector &variables; + bool operator()(VarIndex a, VarIndex b) { + Variable &A = variables[a.value]; + Variable &B = variables[b.value]; + if (A.depth != B.depth) + return A.depth > B.depth; + if (variables[a.value].weight != variables[b.value].weight) + return variables[a.value].weight < variables[b.value].weight; + return a.value < b.value; + } + Compare(std::vector &variables) : variables(variables) {} + }; + + /// An equation represents an assertion that + /// the variable referenced by the lhs has a weight equal to the sum of + /// the values of variables referenced in the rhs vector. + struct Equation { + VarIndex lhs; + SmallVector rhs; + Equation(VarIndex lhs) : lhs(lhs) {} + }; + std::vector equations; + + EqIndex newEquation(VarIndex lhs) { + EqIndex eq{static_cast(equations.size())}; + equations.emplace_back(lhs); + variables[lhs.value].uses.push_back(eq); + return eq; + } + void addRHS(EqIndex eq, VarIndex var) { + equations[eq.value].rhs.push_back(var); + variables[var.value].uses.push_back(eq); + } + + /// Map pairs of component references to a corresponding variable + /// represented by its index. + DenseMap, VarIndex> edgeVariables; + +#ifndef NDEBUG + // debugging aide for printing graphs. + DenseMap> componentSuccessors; + using EdgeIterator = typename SmallVectorImpl::iterator; +#endif + + /// Return the variable corresponding to the edge from \p x to \p y. + /// When this variable is first created, we also add this variable + /// to the equations for output for \p x and the inputs of \p y. + VarIndex getEdgeVariable(ComponentRef x, ComponentRef y) { + auto pair = edgeVariables.try_emplace({x, y}, 0); + if (pair.second) { + VarIndex var = variable(PNAME(name(x) + "->" + name(y)) depth(x)); + pair.first->getSecond() = var; + addRHS(getNodeEquations(x).outputs, var); + addRHS(getNodeEquations(y).inputs, var); +#ifndef NDEBUG + componentSuccessors[x].push_back(y); +#endif + } + return pair.first->getSecond(); + } + + /// Information about equations which correspond to an edge incident + /// to a nested strongly connected component. + /// These come in pairs with the same variable as lhs on both. + /// They represent an assertion that the sum of edge weights into an SCC + /// equal the sum of edge weights out of that SCC. + struct EdgeEquations { + EqIndex source; + EqIndex sink; + }; + VarIndex variable(EdgeEquations eq) { return equations[eq.source.value].lhs; } + + /// Map from an edge variable index to the corresponding edge equations + DenseMap edgeEquations; + + /// \p e is the variable corresponding to an edge where at least one + /// endpoint is incident to a child SCC. Here we create equation placeholders + /// for the inputs or outputs of that child SCC whose sum should equal + /// the value on the edge. + EdgeEquations getEdgeEquations(VarIndex e) { + auto pair = edgeEquations.try_emplace(e.value, EdgeEquations{{0}, {0}}); + if (pair.second) { + VarIndex v = variable(PNAME(("edge" + Twine(e.value)).str()) depth(e)); + pair.first->getSecond() = EdgeEquations{newEquation(v), newEquation(v)}; + addRHS(newEquation(e), v); + } + return pair.first->getSecond(); + } + + /// For each node there are two equations which represent sum of + /// inputs and sum of output weights. This equations have the same + /// variable as lhs which is the variable associated with the node. + struct NodeEquations { + EqIndex inputs; + EqIndex outputs; + }; + DenseMap nodeVariables; + + /// Create a variable for a reference to a component node which may + /// be an artifical entry or exit node. + NodeEquations getNodeEquations(ComponentRef x) { + auto pair = nodeVariables.try_emplace(x, NodeEquations{{0}, {0}}); + if (pair.second) { + VarIndex v = variable(PNAME(name(x)) depth(x)); + pair.first->getSecond() = NodeEquations{newEquation(v), newEquation(v)}; + } + return pair.first->getSecond(); + } + + // Utility functions to access variables + VarIndex variable(NodeEquations eq) { return equations[eq.inputs.value].lhs; } + VarIndex variable(ComponentRef c) { return variable(getNodeEquations(c)); } + + /// Return the variable associated with control flow edges + /// from source to sink. We have one variable even when there might be + /// multiple such edges in the control flow graph for F. + Variable &getEdgeVariable(BasicBlock *source, BasicBlock *sink); + +#ifndef NDEBUG + // Debugging functions + void dumpVariable(VarIndex v) { + Variable &var = variables[v.value]; + dbgs() << var.name; + if (var.defined) + dbgs() << "=" << var.weight; + else if (var.weight != MAX_WEIGHT) + dbgs() << "<" << var.weight; + } + void dumpEquation(EqIndex e) { dumpEquation(equations[e.value]); } + void dumpEquation(Equation &eq) { + dumpVariable(eq.lhs); + const char *op = " == "; + for (VarIndex rhs : eq.rhs) { + dbgs() << op; + dumpVariable(rhs); + op = " + "; + } + } + // Dump all equations + void dumpEquations(const char *title) { + dbgs() << "\n\n" << title << "\n"; + for (unsigned eq = 0; eq < equations.size(); eq++) { + if (equations[eq].rhs.empty()) + continue; + dbgs() << "eq " << eq << ": "; + dumpEquation(EqIndex{eq}); + dbgs() << "\n"; + } + } +#endif + + /// Get weight of a variable if it is defined. + Optional getWeight(VarIndex v) { + Variable &var = variables[v.value]; + if (var.defined) + return var.weight; + return {}; + } + + using ApproxWeight = std::pair; + + /// Get weight and a flag to indicate that it is defined or not. + ApproxWeight getApproxWeight(VarIndex v) { + Variable &var = variables[v.value]; + return {var.weight, var.defined}; + } +#ifndef NDEBUG + /// Declaration used to access information from the graph writing + /// debug classes. + ApproxWeight getApproxWeight(NestedSCCT *SCC, unsigned node) { + return getApproxWeight(variable(ComponentRef{SCC, node})); + } + ApproxWeight getApproxWeight(NestedSCCT *SCC, unsigned x, unsigned y) { + return getApproxWeight( + getEdgeVariable(ComponentRef{SCC, x}, ComponentRef{SCC, y})); + } + friend class WeightsDAGPrinter; + friend struct GraphTraits; + friend struct DOTGraphTraits; +#endif +}; +} // namespace + +#ifndef NDEBUG +unsigned EdgeWeightDeterminator::invocationCount = 0; +#endif + +EdgeWeightMap EdgeWeightDeterminator::propagate() { + LLVM_DEBUG({ + unsigned count = 0; + for (BasicBlock &BB : F) + if (!BB.hasName() || BB.getName().size() < 2) + BB.setName(("bb" + Twine(count++)).str()); + }); +#ifndef NDEBUG + bool shouldDot = shouldPrintDot(F); + // this variable is used in the name of ".dot" files written + // for debugging. +#endif + initializeLoopEstimates(); + propagateWeightsToDominators(); + for (BasicBlock &BB : F) { + if (!active(&BB)) + continue; + for (BasicBlock *succ : successors(&BB)) + if (active(succ)) + induceEquationsFromEdges(&BB, succ); + addControlEquivalenceEquations(&BB); + } + addReturnEquations(); +#ifndef NDEBUG + unsigned pass = 0; + if (shouldDot) { + dumpEquations("initial"); + dot(invocationCount, pass++); + } +#endif + // Where there is a defined block weight, set the corresponding + // variables values. + for (auto pair : BlockWeights) { + BasicBlock *BB = const_cast(pair.getFirst()); + ComponentRef c = lookup(BB); + if (!c.first) + continue; + if (c.second == 0 && depth(c) > 0) + // Ok early we have a loop which looks like + // while(x) y; + // later (after loop rotation) we will have + // if (x) do y while(x); + // the true execution count for "x" is the sum of the two 'x' counts + // but instead we get the max in the profile. So, igore this count + // and infer it from surrounding information. This is import to get + // loop trip count estimates correct. + continue; + if (isSelfLoop(BB)) + // The block weight on single-node loop does not influence + // other blocks. + continue; + setVariable(variable(getNodeEquations(c)), pair.getSecond()); + } + loopEstimateEquations(); + +#ifndef NDEBUG + if (shouldDot) { + dumpEquations("weights"); + dot(invocationCount, pass++); + } +#endif + solveDefined(); +#ifndef NDEBUG + if (shouldDot) { + dumpEquations("solved"); + dot(invocationCount, pass++); + } +#endif + if (!NoMaxEstimate) { + minimalMax(); +#ifndef NDEBUG + if (shouldDot) { + dumpEquations("final"); + dot(invocationCount, pass++); + } +#endif + } + /// Set values in OutputEdgeWeights + for (BasicBlock &BB : F) { + // unreachable block? + if (!active(&BB)) + continue; + setFinalWeight(&BB); + } +#ifndef NDEBUG + if (shouldDot) + invocationCount += 1; +#endif + return std::move(OutputEdgeWeights); +} + +void EdgeWeightDeterminator::loopEstimateEquations() { + + // Apply a heuristic that the loop header node has weight equal + // to the entry weight and the largest weight in the loop. This is used + // for simple single-entry, single-exit-at-top loops to account + // for subsequent loop-rotation. + // TODO -- check for single node loops + // and simulate a loop body node in some way. + for (NestedSCCT *C : SCCList) { + + // multiple entry nodes into loop? + if (C->SCCBase::headNode == MULTIPLE_NODES) + continue; + // A simple exit-at-top loop? + if (C->SCCBase::exitNode != C->SCCBase::headNode) + continue; + + // No real weight in this loop? then for all nodes + // in the loop to be zero. + if (C->SCCBase::estimated < 10000) { + if (C->SCCBase::estimated > 0) + continue; + for (const NestedSCCT::Element &E : scc_nodes(*C)) + if (!E.getNested() && C->index(E) != C->SCCBase::headNode) + setVariable(variable(ComponentRef{C, C->index(E)}), 0); + continue; + } + + // loop head is always first node in a component; + VarIndex headVar = variable(ComponentRef{C, 0}); + if (isDefined(headVar)) { + LLVM_DEBUG(dbgs() << "header " << name(headVar) + << " set, igoring estimate\n";); + continue; + } + LLVM_DEBUG(dbgs() << "header " << name(headVar) << " estimate as " + << C->SCCBase::estimated << "\n";); + EqIndex eq = newEquation(headVar); + addRHS(eq, variable(entryNode(C))); + VarIndex estimate = variable(PNAME( + ("estimate" + Twine(C->SCCBase::number).str())) depth(C->getDepth())); + addRHS(eq, estimate); + setVariable(estimate, C->SCCBase::estimated); + } +} + +void EdgeWeightDeterminator::minimalMax() { + + // Solve for maximum value of undefined variables. Consider if we + // have three edges leaving a block with known weight and one edge + // has a known weight, then we can set a maximum value for the other + // two edges. + + // We prioritize inside-out and small-weights first as that prevents + // small values from being lost in saturated arithmetic between large values. + PriorityQueue, Compare> pqueue( + (Compare(variables))); + + // initialize undefined vairables to max value. + for (Variable &V : variables) { + if (!V.defined) + V.weight = MAX_WEIGHT; + else { + unsigned idx = &V - &variables[0]; + V.onQuene = true; + pqueue.push(VarIndex{idx}); + } + } + + // Set a new bound on a variable. Add it to pqueue if it + // changes enough. + auto setVarBound = [this, &pqueue](VarIndex idx, uint64_t w) { + Variable &V = variables[idx.value]; + if (V.defined || V.weight <= w) + return; + uint64_t old = V.weight; + LLVM_DEBUG(dbgs() << "bound " << V.name << " " << w << " was " << old + << "\n";); + V.weight = w; + // We ignore small changes in values to accelerate termination. + if (w * 1.3 < old * 1.0 && !V.onQuene) { + pqueue.push(idx); + V.onQuene = true; + } + }; + + // Evaluate an equation to look for new bounds on variables + // mentioned in that equations. + auto eval = [this, setVarBound](Equation &E) { + if (E.rhs.empty()) + return; + + // How much weight is not accounted for? + uint64_t available = getApproxWeight(E.lhs).first; + for (VarIndex V : E.rhs) + if (Optional W = getWeight(V)) + available = difference(available, *W); + + // Update bonds on variables, compute new bound on lhs. + uint64_t rhs_max = 0; + for (VarIndex V : E.rhs) { + setVarBound(V, available); + rhs_max = sum(rhs_max, variables[V.value].weight); + } + setVarBound(E.lhs, rhs_max); + }; + + // Propagate changes in vairable bounds until the system + // stabilizes (within the change tolerance mentioned above). + while (!pqueue.empty()) { + VarIndex v = pqueue.top(); + pqueue.pop(); + variables[v.value].onQuene = false; + + LLVM_DEBUG(dbgs() << "pushing max " << variables[v.value].name << " depth " + << depth(v) << " weight " << weight(v) << "\n";); + // Try to evaluate each equation using v + for (EqIndex eqIdx : variables[v.value].uses) { + Equation &eq = equations[eqIdx.value]; + LLVM_DEBUG({ + dbgs() << "checking " << eqIdx.value << ": "; + dumpEquation(eqIdx); + dbgs() << "\n"; + }); + eval(eq); + } + } +} + +void EdgeWeightDeterminator::setFinalWeight(BasicBlock *source) { + + // Set values in OutputEdgeWeights + // This may be from variables on edges if they are defined. + + SmallVector, 16> weights; + uint64_t known = 0; + uint64_t other = 0; + for (BasicBlock *succ : successors(source)) { + if (!active(succ)) + continue; + + Variable &V = getEdgeVariable(source, succ); + if (V.defined) + known += V.weight; + else + other = sum(other, V.weight); + weights.emplace_back(succ, V.weight); + } + + // If we have unknown weight that a signigicant fraction + // of known weights, then don't set any weights for this node. + if (other && (NoMaxEstimate || 10 * other >= known)) + return; + // Otherwise we implicit promote the unknown edges to have a + // weight equal to their max. + LLVM_DEBUG(for (BasicBlock *succ + : successors(source)) { + if (!active(succ)) + continue; + + Variable &V = getEdgeVariable(source, succ); + if (V.defined) + continue; + dbgs() << "promoting bound " << source->getName() << "->" << succ->getName() + << " = " << V.weight << "\n"; + }); + for (auto &pair : weights) { + LLVM_DEBUG(dbgs() << "final " << source->getName() << "->" + << pair.first->getName() << " " << pair.second << "\n";); + OutputEdgeWeights.try_emplace(CFGEdge{source, pair.first}, pair.second); + } +} + +EdgeWeightDeterminator::Variable & +EdgeWeightDeterminator::getEdgeVariable(BasicBlock *source, BasicBlock *sink) { + + ComponentRef sourceC = lookup(source); + assert(sourceC.first); + ComponentRef sinkC = lookup(sink); + assert(sinkC.first); + if (sourceC.first != sinkC.first) { + unsigned sourceDepth = depth(sourceC); + unsigned sinkDepth = depth(sinkC); + for (; sinkDepth > sourceDepth; sinkDepth--) + sinkC = parent(sinkC); + if (sourceC.first != sinkC.first) { + // ok we have an exit edge and need to find the local + // edge that corresponds to it. + sinkC = exitNode(sourceC.first); + } + } + return variables[getEdgeVariable(sourceC, sinkC).value]; +} + +void EdgeWeightDeterminator::solveDefined() { + + auto setVar = [this](VarIndex v, uint64_t w) { + Variable &var = variables[v.value]; + if (var.defined) + return false; + var.defined = true; + var.weight = w; + LLVM_DEBUG(dbgs() << "set " << var.name << " to " << w << "\n"); + return true; + }; + + auto varWeight = [this](VarIndex var) { return variables[var.value].weight; }; + + PriorityQueue, Compare> pqueue( + (Compare(variables))); + for (unsigned idx = 0; idx < variables.size(); idx++) + if (isDefined(VarIndex{idx})) + pqueue.push(VarIndex{idx}); + + VarIndex null{std::numeric_limits::max()}; + VarIndex undefined{null.value - 1}; + + while (!pqueue.empty()) { + VarIndex v = pqueue.top(); + pqueue.pop(); + LLVM_DEBUG(dbgs() << "pushing " << variables[v.value].name << " depth " + << depth(v) << " weight " << weight(v) << "\n";); + + // Try to evaluate each equation using v + for (EqIndex eqIdx : variables[v.value].uses) { + Equation &eq = equations[eqIdx.value]; + LLVM_DEBUG({ + dbgs() << "checking " << eqIdx.value << ": "; + dumpEquation(eqIdx); + dbgs() << "\n"; + }); + + // see if the equation has exactly one unknown variable. + VarIndex unknown = (isDefined(eq.lhs) ? null : eq.lhs); + uint64_t sum = 0; // accumulate known rhs's + for (VarIndex rhs : eq.rhs) { + if (isDefined(rhs)) { + sum += varWeight(rhs); + continue; + } + if (unknown != null) { // two or more unknowns, give up + unknown = undefined; + break; + } + unknown = rhs; + } + if (unknown == undefined || unknown == null) + continue; + // if the unknown is a rhs variable, compute its value + // otherwise sum is the value of the lhs variable. + if (unknown != eq.lhs) + sum = difference(varWeight(eq.lhs), sum); + setVar(unknown, sum); + pqueue.push(unknown); + } + } +} + +void EdgeWeightDeterminator::induceEquationsFromEdges(BasicBlock *source, + BasicBlock *sink) { + if (source == sink) { + assert(isSelfLoop(source)); + return ; + } + ComponentRef sourceC = lookup(source); + ComponentRef sinkC = lookup(sink); + + // Walk up component trees recording nodes + // until we find a common ancestor component. + SmallVector Exits; + SmallVector Entries; + if (sourceC.first != sinkC.first) { + unsigned sourceDepth = depth(sourceC); + unsigned sinkDepth = depth(sinkC); + for (; sourceDepth > sinkDepth; --sourceDepth) { + noteExit(sourceC); + Exits.push_back(sourceC); + sourceC = parent(sourceC); + } + for (; sinkDepth > sourceDepth; --sinkDepth) { + noteHeader(sinkC); + Entries.push_back(sinkC); + sinkC = parent(sinkC); + } + while (sourceC.first != sinkC.first) { + noteExit(sourceC); + noteHeader(sinkC); + Exits.push_back(sourceC); + sourceC = parent(sourceC); + Entries.push_back(sinkC); + sinkC = parent(sinkC); + } + } + VarIndex edgeVar = getEdgeVariable(sourceC, sinkC); + VarIndex sourceEdge = edgeVar; + for (ComponentRef child : reverse(Exits)) { + VarIndex exitEdge = getEdgeVariable(child, exitNode(child.first)); + addRHS(getEdgeEquations(sourceEdge).source, exitEdge); + sourceEdge = exitEdge; + } + VarIndex sinkEdge = edgeVar; + for (ComponentRef child : reverse(Entries)) { + VarIndex entryEdge = getEdgeVariable(entryNode(child.first), child); + addRHS(getEdgeEquations(sinkEdge).sink, entryEdge); + sinkEdge = entryEdge; + } +} + +template +void EdgeWeightDeterminator::propagateWeightsToDominators( + SubsetDomTree &DT) { + + DTNode *Root = DT.getRootNode(); + + // Find the most immediate dominator of DN which + // is in the same stronglly connected component. + auto getDominator = [this](DTNode *DN) -> BasicBlock * { + if (isSelfLoop(DN->getBlock()->BB)) + return nullptr; + ComponentRef CN = lookup(DN->getBlock()->BB); + for (;;) { + DTNode *IDom = DN->getIDom(); + if (!IDom || !IDom->getBlock()) + return nullptr; + BasicBlock *BB = IDom->getBlock()->BB; + ComponentRef ICN = lookup(BB); + if (depth(ICN) < depth(CN)) + return nullptr; + if (depth(ICN) == depth(CN)) + return BB; + DN = IDom; + } + }; + + using iterator = DTNode::const_iterator; + SmallVector, 32> stack; + stack.emplace_back(Root, Root->getChildren().begin()); + while (!stack.empty()) { + DTNode *DN = stack.back().first; + iterator &it = stack.back().second; + // Push next child on the stack + if (it != DN->getChildren().end()) { + DTNode *child = *it++; + stack.emplace_back(child, child->getChildren().begin()); + continue; + } + + // ok, done with this subtree + stack.pop_back(); + + // add this weight to closest dominator in same component + if (!DN->getBlock()) + continue; + BasicBlock *BB = DN->getBlock()->BB; + if (Optional BBW = getBlockWeight(BB)) + if (BasicBlock *Dom = getDominator(DN)) { + uint64_t &W = BlockWeights[Dom]; + if (*BBW > W) { + W = *BBW; + LLVM_DEBUG(dbgs() << (IsPDom ? "pdom " : "dom ") << Dom->getName() + << " of " << BB->getName() << " new weight " << W + << "\n";); + } + } + } +} + +void EdgeWeightDeterminator::propagateWeightsToDominators() { + propagateWeightsToDominators(DT); + propagateWeightsToDominators(PDT); +} + +void EdgeWeightDeterminator::addControlEquivalenceEquations(BasicBlock *BB) { + if (BasicBlock *dom = controlEquivalentDominator(BB)) + addRHS(newEquation(variable(lookup(dom))), variable(lookup(BB))); +} + +BasicBlock *EdgeWeightDeterminator::controlEquivalentDominator(BasicBlock *BB) { + + if (isSelfLoop(BB)) + return nullptr; + ComponentRef c = lookup(BB); + + // Walk up the dominator tree looking at + // control equivalent dominators until we find one + // in the same component. Quit if we find one that is + // not equivalent or in an outer component. + DTNode *DN = DT.getNodeFromBlock(BB); + while (DN != DT.getRootNode()) { + DTNode *IDom = DN->getIDom(); + BasicBlock *dom = IDom->getBlock()->BB; + if (!PDT.dominates(BB, dom)) + break; + + ComponentRef d = lookup(dom); + if (d.first == c.first) + return dom; + if (depth(d) < depth(c)) + break; + DN = IDom; + } + return nullptr; +} + +void EdgeWeightDeterminator::addReturnEquations() { + + VarIndex entry = variable(lookup(&F.getEntryBlock())); + EqIndex eq = newEquation(entry); + for (BasicBlock *R : returnBlocks) + if (active(R)) + addRHS(eq, variable(lookup(R))); + + // TODO -- this should not be needed since it should be + // enforced at the containing level -- check it. + for (NestedSCCT *SCC : SCCList) + if (SCC->getDepth() > 0) + addRHS(newEquation(variable(entryNode(SCC))), variable(exitNode(SCC))); +} + +void EdgeWeightDeterminator::initializeLoopEstimates() { + + for (NestedSCCT *region : reverse(SCCList)) { + uint64_t local = 0; + uint64_t nested = 0; + for (const NestedSCCT::Element &N : scc_nodes(*region)) { + if (NestedSCCT *child = N.getNested()) { + nested = std::max(nested, child->SCCBase::estimated); + continue; + } + BasicBlock * BB = N.getBaseNode(); + bool selfLoop = any_of(successors(BB), + [BB](BasicBlock * Succ) { return BB==Succ; }); + if (selfLoop) { + isSelfLoopSet.insert(BB); + LLVM_DEBUG(dbgs() << "self loop: " + << " " << BB->getName() + << "\n"); + } + auto iter = BlockWeights.find(BB); + if (iter == BlockWeights.end()) + continue; + if (selfLoop) { + nested = std::max(nested, iter->getSecond()); + continue; + } + // TODO, should we skip the unique entry node? due to the loop + // rotation problem? + local = std::max(local, iter->getSecond()); + } + region->SCCBase::estimated = (local > 0 ? local : nested); + LLVM_DEBUG(dbgs() << "estimated " << region->SCCBase::number << " = " + << region->SCCBase::estimated << "\n";); + } +} + +static DenseSet +getReachesReturn(Function &F, const SmallVectorImpl &returns, + const SCCNestT &SCCs) { + DenseSet reachesReturn; + SmallVector queue(returns.begin(), returns.end()); + for (BasicBlock *BB : returns) + if (SCCs.getComponent(BB).first) // reachable from entry? + reachesReturn.insert(BB); + + while (!queue.empty()) { + BasicBlock *BB = queue.pop_back_val(); + for (BasicBlock *pred : predecessors(BB)) + if (SCCs.getComponent(pred).first && reachesReturn.insert(pred).second) + queue.push_back(pred); + } + return reachesReturn; +} + +EdgeWeightMap llvm::sampleprof::determineEdgeWeights(Function &F, + BlockWeightMap & BlockWeights) { + + LLVM_DEBUG(dbgs() << "Determine edge weights: " << F.getName() << "\n";); + + // Find all return points + SmallVector returnBlocks; + for (BasicBlock &BB : F) + if (isa(BB.getTerminator())) + returnBlocks.push_back(&BB); + + // If a function has an unconditional throw() it will not have a return + if (returnBlocks.empty()) { + LLVM_DEBUG(dbgs() << "no normal return found\n";); + return EdgeWeightMap(); + } + + SCCNestT SCCs(&F); + DenseSet reachesReturn = + getReachesReturn(F, returnBlocks, SCCs); + assert(!reachesReturn.empty()); + + // The normal subgraph excludes unreachables nodes and nodes which + // don't have a path to a return. This is used to make control information + // such as dominators more accurate. We tree blocks outside of this subgraph + // as never executed. + SubsetCFG NormalSubgraph(F, reachesReturn); + + EdgeWeightDeterminator solver(F, SCCs, BlockWeights, NormalSubgraph, + returnBlocks); + EdgeWeightMap EdgeWeights = solver.propagate(); + return EdgeWeights; +} + +//===----------------------------------------------------------------------===// +// +// Following type and rourintes support printing annotated DOT files +// +//===----------------------------------------------------------------------===// +#ifndef NDEBUG +namespace { +// This class is only used as a debugging aide to proint out SCC DAG's in DOT +// format The graph printer requires that node references be pointers. Here we +// create a vector of pairs of node numbers and pointers back to this class. +// Each element in this vector corresponds to a "node" in the dag and we use the +// addresses of elements as node references for the purpose of printing graphs. +class WeightsDAGPrinter { +public: + WeightsDAGPrinter(EdgeWeightDeterminator *SW, NestedSCCT *SCC); + using ComponentRef = EdgeWeightDeterminator::ComponentRef; + +private: + // Local per-node information used for printing. + struct NodeRef { + unsigned Node; + WeightsDAGPrinter *Printer; + int64_t inError; + int64_t outError; + bool hasIn; + bool hasOut; + bool hasError; + bool active; + }; + + friend struct GraphTraits; + friend struct DOTGraphTraits; + SmallVector Nodes; + EdgeWeightDeterminator *EWD; + NestedSCCT *SCC; + + struct nodes_iterator; + struct ChildIteratorType; +}; +} // namespace + +WeightsDAGPrinter::WeightsDAGPrinter(EdgeWeightDeterminator *SW, + NestedSCCT *SCC) + : EWD(SW), SCC(SCC) { + + using ComponentRef = EdgeWeightDeterminator::ComponentRef; + using VarIndex = EdgeWeightDeterminator::VarIndex; + unsigned N = SCC->size() + (SCC->getDepth() ? 2 : 0); + auto active = [SCC, SW](unsigned i) -> bool { + if (i >= SCC->size()) + return true; + if (SCC->at(i).getNested()) + return true; + return (SW->SG.getBlocks().count(SCC->at(i).getBaseNode())); + }; + + for (unsigned i = 0; i < N; i++) { + int64_t w = 0; + bool isActive = active(i); + if (isActive) { + VarIndex v = EWD->variable(ComponentRef{SCC, i}); + w = EWD->variables[v.value].weight; + } + Nodes.push_back({i, this, w, w, false, false, false, isActive}); + } +} +// An iterator over the nodes in a EdgeWeightDeterminator indirected +// through a WeightsDAGPrinter instance. +struct WeightsDAGPrinter::nodes_iterator { + WeightsDAGPrinter *G; + unsigned cur; + NodeRef *operator*() { + // A NodeRef must be a pointer so we return the address + // of what is essentially an iota vector in the G. The first + // element of this vector is equal to Cur and the second equal to G. + // G->Nodes[Cur] == std::make_pair(Cur,G); + return &G->Nodes[cur]; + } + void operator++() { + do { + ++cur; + } while (cur < G->Nodes.size() && !G->Nodes[cur].active); + } + bool operator!=(const nodes_iterator &Other) const { + return cur != Other.cur; + } +}; + +/// This iterator wrapps an edge iterator from the determinator class +/// and translate node indices into suitable NodeRef's whuch are +/// addresses of elements in G->Nodes. +struct WeightsDAGPrinter::ChildIteratorType { + WeightsDAGPrinter *G; + using SuccIterator = EdgeWeightDeterminator::EdgeIterator; + SuccIterator Cur; + NodeRef *operator*() { return &G->Nodes[Cur->second]; } + void operator++() { Cur++; } + bool operator!=(const ChildIteratorType &Other) { return Cur != Other.Cur; } + unsigned operator-(const ChildIteratorType &Other) { return Cur - Other.Cur; } +}; + +namespace llvm { +/// This class corresponds to the static parameters needef for GraphWriter +template <> struct GraphTraits { + using NodeRef = WeightsDAGPrinter::NodeRef *; + // This simply wraps a counter of node numbers and converts it + // into a suitable pointer. + using nodes_iterator = WeightsDAGPrinter::nodes_iterator; + static nodes_iterator nodes_begin(WeightsDAGPrinter *G) { return {G, 0}; } + static nodes_iterator nodes_end(WeightsDAGPrinter *G) { + return {G, static_cast(G->Nodes.size())}; + } + using ChildIteratorType = WeightsDAGPrinter::ChildIteratorType; + static ChildIteratorType child_begin(NodeRef N) { + WeightsDAGPrinter *SW = N->Printer; + WeightsDAGPrinter::ComponentRef ref{SW->SCC, N->Node}; + return {SW, SW->EWD->componentSuccessors[ref].begin()}; + } + static ChildIteratorType child_end(NodeRef N) { + WeightsDAGPrinter *SW = N->Printer; + WeightsDAGPrinter::ComponentRef ref{SW->SCC, N->Node}; + return {SW, SW->EWD->componentSuccessors[ref].end()}; + } +}; + +/// More static parameters for GraphWriter for the case of creating DOT files. +template <> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + + using NodeRef = WeightsDAGPrinter::NodeRef *; + using ChildIteratorType = GraphTraits::ChildIteratorType; + using ApproxWeight = EdgeWeightDeterminator::ApproxWeight; + + DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(WeightsDAGPrinter *G) { + NestedSCCT *SCC = G->SCC; + NestedSCCT *parent = SCC->getParent(); + if (!parent) + return ""; + return ("Node " + Twine(SCC->getParentIndex()) + " in scc " + + Twine(parent->SCCBase::number) + " base " + + parent->at(0).getBaseNode()->getName()) + .str(); + } + static std::string getNodeAttributes(NodeRef N, WeightsDAGPrinter *) { + NestedSCCT *SCC = N->Printer->SCC; + if (N->Node >= SCC->size()) + return "shape=ellipse"; + if (N->hasError) + return "color=red"; + auto & E = SCC->at(N->Node); + if (E.getNested()) + return "color=green"; + if (N->Printer->EWD->isSelfLoop(E.getBaseNode())) + return "color=green"; + return ""; + } + std::string getNodeLabel(NodeRef N, WeightsDAGPrinter *) { + NestedSCCT *SCC = N->Printer->SCC; + if (N->Node == SCC->size()) + return "entry"; + if (N->Node == SCC->size() + 1) + return "exit"; + + auto &Node = SCC->at(N->Node); + if (NestedSCCT *child = Node.getNested()) { + unsigned index = child->SCCBase::number; + auto &Entry = Node.getNested()->at(0); + if (Entry.getNested()) { + return ("{" + Twine(N->Node) + " -> " + Twine(index) + "}").str(); + } + return ("{" + Entry.getBaseNode()->getName() + ":" + Twine(N->Node) + + " -> " + Twine(index) + "}") + .str(); + } + return (Node.getBaseNode()->getName() + (":" + Twine(N->Node))).str(); + } + + static std::string commas(int64_t x) { + std::string xs = Twine(x).str(); + int n = xs.length(); + std::string result; + auto cur = xs.begin(); + if (*cur == '-') { + result += *cur++; + n -= 1; + } + for (;;) { + result += *cur++; + n -= 1; + if (n == 0) + return result; + if (n % 3 == 0) + result += ','; + } + } + + static std::string getNodeDescription(NodeRef N, WeightsDAGPrinter *) { + NestedSCCT *SCC = N->Printer->SCC; + ApproxWeight W = N->Printer->EWD->getApproxWeight(SCC, N->Node); + + const char *terminus = ""; + const char *selfLoop = ""; + Optional estimated; + Optional profiled; + if (N->Node < SCC->size()) { + auto &Node = SCC->at(N->Node); + if (!Node.getNested()) { + BasicBlock * BB = Node.getBaseNode(); + profiled = N->Printer->EWD->getBlockWeight(BB); + const Instruction *term = BB->getTerminator(); + if (isa(term)) + terminus = "\\|return"; + else if (isa(term)) + terminus = "\\|unreachable"; + else if (isa(term)) + terminus = "\\|resume"; + if (N->Printer->EWD->isSelfLoop(BB)) + selfLoop = "\\|self loop"; + } else { + estimated = Node.getNested()->SCCBase::estimated; + } + } + + std::string weight = "none"; + if (profiled) + weight = ("*" + Twine(commas(profiled.getValue()))).str(); + else if (W.first < MAX_WEIGHT) + weight = ((W.second ? "" : "<") + Twine(commas(W.first))).str(); + + Optional entryWeight = + N->Printer->EWD->getBlockWeight(&N->Printer->EWD->F.getEntryBlock()); + std::string freq = ""; + if (entryWeight && profiled) { + std::stringstream FS; + FS << "\\|f=" << + float(profiled.getValue())/float(entryWeight.getValue()); + freq = FS.str(); + } + Twine Result = + Twine(weight) + + (estimated ? "\\|e=" + Twine(commas(*estimated)) : Twine("")) + + (profiled && W.first != profiled.getValue() + ? "\\|c=" + Twine(commas(W.first)) : Twine("")) + + freq + selfLoop + terminus; + return Result.str(); + } + + /// Display the raw branch weights from PGO. + std::string getEdgeAttributes(NodeRef source, ChildIteratorType &I, + WeightsDAGPrinter *) { + NodeRef sink = *I; + unsigned sourceIdx = source->Node; + unsigned sinkIdx = sink->Node; + ApproxWeight W = source->Printer->EWD->getApproxWeight(source->Printer->SCC, + sourceIdx, sinkIdx); + if (W.first == MAX_WEIGHT) + return ""; + Twine Attrs = Twine("label=\"") + (W.second ? "" : "<") + + Twine(commas(W.first)) + "\""; + return Attrs.str(); + } +}; +} // namespace llvm + +/// This is needed only to get std::distance to work on ChildIteratorType in +/// GraphWriter +namespace std { +template <> +struct iterator_traits::ChildIteratorType> { + using iterator_category = random_access_iterator_tag; + using difference_type = unsigned; +}; +} // namespace std + +/// Write out the current SCC to a file using the \P pass number and the +/// this->sccNum as well as the currenrt function name to generate a file name. +void EdgeWeightDeterminator::dot(unsigned pass, unsigned phase) { + for (NestedSCCT *SCC : SCCList) + dot(SCC, pass, phase); +} + +void EdgeWeightDeterminator::dot(NestedSCCT *SCC, unsigned pass, + unsigned phase) { + std::string Filename = + ("profile." + Twine(pass) + "." + Twine(phase) + "." + + Twine(SCC->SCCBase::number) + "." + F.getName().substr(0, 50) + ".dot") + .str(); + errs() << "Writing '" << Filename << "'..."; + std::error_code EC; + raw_fd_ostream File(Filename, EC, sys::fs::F_Text); + if (EC) { + errs() << " error opening file for writing!\n"; + return; + } + errs() << "\n"; + WeightsDAGPrinter swp{this, SCC}; + GraphWriter writer(File, &swp, true); + writer.writeGraph(); +} +#endif Index: lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- lib/Transforms/IPO/SampleProfile.cpp +++ lib/Transforms/IPO/SampleProfile.cpp @@ -22,6 +22,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/SampleProfile.h" +#include "SampleEdgeWeights.h" // facebook T38406375 + #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -128,6 +130,12 @@ "callsite and function as having 0 samples. Otherwise, treat " "un-sampled callsites and functions conservatively as unknown. ")); +// facebook begin T38406375 +static cl::opt + NewEdgeWeights("new-sample-branch-weights", cl::init(true), cl::Hidden, + cl::desc("Use new algorithm for assigning branch weights")); +// facebook end T38406375 + namespace { using BlockWeightMap = DenseMap; @@ -232,6 +240,7 @@ DominatorTreeBase *DomTree); void propagateWeights(Function &F); + void setBranchWeights(Function &F); // facebook T38406375 uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); void buildEdges(Function &F); bool propagateThroughEdges(Function &F, bool UpdateBlockCount); @@ -659,15 +668,15 @@ return nullptr; } - StringRef CalleeName; - if (const CallInst *CI = dyn_cast(&Inst)) - if (Function *Callee = CI->getCalledFunction()) - CalleeName = Callee->getName(); - + // facebook begin T33789838 const FunctionSamples *FS = findFunctionSamples(Inst); if (FS == nullptr) return nullptr; + StringRef CalleeName; + if (Function *Callee = CallSite(const_cast(&Inst)).getCalledFunction()) + CalleeName = Callee->getName(); + // facebook end T33789838 return FS->findFunctionSamplesAt(LineLocation(FunctionSamples::getOffset(DIL), DIL->getBaseDiscriminator()), CalleeName); @@ -1291,9 +1300,21 @@ while (Changed && I++ < SampleProfileMaxPropagateIterations) { Changed = propagateThroughEdges(F, true); } + // facebook T38406375 +} + +/// Generate MD_prof metadata for every branch instruction using the +/// edge weights computed during propagation. +void SampleProfileLoader::setBranchWeights(llvm::Function &F) { + + auto getWeight = [this](BasicBlock *x, BasicBlock *y) -> Optional { + auto iter = EdgeWeights.find({x, y}); + if (iter == EdgeWeights.end()) + return {}; + return iter->second; + }; + // facebook end T38406375 - // Generate MD_prof metadata for every branch instruction using the - // edge weights computed during propagation. LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n"); LLVMContext &Ctx = F.getContext(); MDBuilder MDB(Ctx); @@ -1349,9 +1370,12 @@ Instruction *MaxDestInst; for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) { BasicBlock *Succ = TI->getSuccessor(I); - Edge E = std::make_pair(BB, Succ); - uint64_t Weight = EdgeWeights[E]; - LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E)); + // facebook begin T38406375 + uint64_t Weight = 0; + if (Optional edgeWeight = getWeight(BB, Succ)) + Weight = *edgeWeight; + LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), {BB, Succ})); + // facebook end T38406375 // Use uint32_t saturated arithmetic to adjust the incoming weights, // if needed. Sample counts in profiles are 64-bit unsigned values, // but internally branch weights are expressed as 32-bit values. @@ -1502,14 +1526,23 @@ ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), &InlinedGUIDs); - // Compute dominance and loop info needed for propagation. - computeDominanceAndLoopInfo(F); - - // Find equivalence classes. - findEquivalenceClasses(F); - - // Propagate weights to all edges. - propagateWeights(F); + if (NewEdgeWeights) { + // This matches a computation in findEquivalenceClasses + uint64_t &entryWeight = BlockWeights[&F.getEntryBlock()]; + entryWeight = std::max(entryWeight, Samples->getHeadSamples() + 1); + EdgeWeights = determineEdgeWeights(F, BlockWeights); + } else { + // Compute dominance and loop info needed for propagation. + computeDominanceAndLoopInfo(F); + + // Find equivalence classes. + findEquivalenceClasses(F); + + // Propagate weights to all edges. + propagateWeights(F); + } + // Set weights in metadata + setBranchWeights(F); } // If coverage checking was requested, compute it now. Index: lib/Transforms/IPO/SubsetCFG.h =================================================================== --- /dev/null +++ lib/Transforms/IPO/SubsetCFG.h @@ -0,0 +1,170 @@ +//===- SubsetCFG.h - subset of CFG abstraction --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// facebook T38406375 +// +// +// + +#ifndef LLVM_LIB_TRANSFORMS_IPO_SUBSETCFG_H +#define LLVM_LIB_TRANSFORMS_IPO_SUBSETCFG_H 1 + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/GenericDomTree.h" +#include "llvm/Support/GenericDomTreeConstruction.h" + +namespace llvm { + +/// A graph abstraction corresponding to a subset of a control flow graph. +/// This abstraction is complete enought to allow dominator trees to be +/// constructed. +class SubsetCFG { +public: + /// The function which defines the underlying control flow grap + Function &F; + /// A node in the subset with a pointer to the underlying + /// control flow graph with explicit lists of predecessors and + /// successors. + struct NodeT { + BasicBlock *BB; + SubsetCFG *G; + SmallVector predecessors; + SmallVector successors; + + SubsetCFG *getParent() const { return G; } + void printAsOperand(raw_ostream &O, bool) { O << BB->getName(); } + }; + using NodeRef = NodeT *; + + /// All nodes in the subgraph. + std::vector nodes; + + /// Constructor taking a function and a predicate on basic blocks + /// which indicates which blocks are in the subset. The entry node for + /// the function must be in the subset. + template + SubsetCFG(Function &F, Predicate blockInGraph) : F(F) { + initialize(blockInGraph); + } + SubsetCFG(Function &F, const DenseSet &active); + + NodeRef getNode(BasicBlock *BB) { + auto iter = blocks.find(BB); + if (iter == blocks.end()) + return nullptr; + return &nodes[iter->second]; + } + /// Map basic blocks in the subgraph to the index in nodes corresponding + /// to them. + const DenseMap &getBlocks() { return blocks; } + +private: + template void initialize(Pred p); + DenseMap blocks; +}; + +// Initialize a subset graph based on a predicate on basic blocks +// which is true for blocks that are part of the subset. The entry +// block must be in the subset. +template +void SubsetCFG::initialize(Predicate blockInGraph) { + unsigned count = 0; + assert(blockInGraph(F.getEntryBlock())); + for (BasicBlock &BB : F) + if (blockInGraph(BB)) + blocks.try_emplace(&BB, count++); + assert(blocks.size() > 0); + // initialize the node sarray including predecessor and successor lists. + nodes.resize(blocks.size()); + for (auto &pair : blocks) { + NodeT &node = nodes[pair.getSecond()]; + node.BB = pair.getFirst(); + node.G = this; + for (BasicBlock *succ : successors(node.BB)) { + auto iter = blocks.find(succ); + if (iter == blocks.end()) + continue; + NodeT &succ_node = nodes[iter->getSecond()]; + node.successors.push_back(&succ_node); + succ_node.predecessors.push_back(&node); + } + } +} + +SubsetCFG::SubsetCFG(Function &F, const DenseSet &active) : F(F) { + auto blockInGraph = [active](BasicBlock &BB) -> bool { + return active.count(&BB); + }; + initialize(blockInGraph); +} + +template <> struct GraphTraits { + using NodeRef = SubsetCFG::NodeRef; + static NodeRef getEntryNode(SubsetCFG *const G) { return &G->nodes[0]; } + using nodes_iterator = pointer_iterator; + static nodes_iterator nodes_begin(SubsetCFG *G) { + return nodes_iterator(G->nodes.begin()); + } + static nodes_iterator nodes_end(SubsetCFG *G) { + return nodes_iterator(G->nodes.end()); + } +}; + +template <> struct GraphTraits { + using NodeRef = SubsetCFG::NodeRef; + using ChildIteratorType = decltype(SubsetCFG::NodeT::successors)::iterator; + static ChildIteratorType child_begin(NodeRef N) { + return N->successors.begin(); + } + static ChildIteratorType child_end(NodeRef N) { return N->successors.end(); } +}; + +template <> struct GraphTraits> { + using NodeRef = SubsetCFG::NodeRef; + using ChildIteratorType = decltype(SubsetCFG::NodeT::predecessors)::iterator; + static ChildIteratorType child_begin(NodeRef N) { + return N->predecessors.begin(); + } + static ChildIteratorType child_end(NodeRef N) { + return N->predecessors.end(); + } +}; + +/// Dominator tree specialization for SubsetCFG graphs. +template +class SubsetDomTree : public DominatorTreeBase { + SubsetCFG &G; + +public: + using Base = DominatorTreeBase; + using Base::dominates; + using Base::DomTreeNodes; + using Base::Parent; + using Base::reset; + using Base::RootNode; + using Base::Roots; + + SubsetDomTree(SubsetCFG &G) : G(G) { + Base::Parent = &G; + DomTreeBuilder::Calculate(*this); + } + const DomTreeNodeBase *getNodeFromBlock(BasicBlock *BB) { + return Base::getNode(G.getNode(BB)); + } + bool dominates(BasicBlock *x, BasicBlock *y) { + return dominates(G.getNode(x), G.getNode(y)); + } +}; +} // namespace llvm + +#endif Index: test/Transforms/SampleProfile/Inputs/entry_counts.prof =================================================================== --- test/Transforms/SampleProfile/Inputs/entry_counts.prof +++ test/Transforms/SampleProfile/Inputs/entry_counts.prof @@ -1,3 +1,3 @@ empty:100:13293 - 0: 0 + 0: 100 1: 100 Index: test/Transforms/SampleProfile/calls.ll =================================================================== --- test/Transforms/SampleProfile/calls.ll +++ test/Transforms/SampleProfile/calls.ll @@ -48,8 +48,8 @@ store i32 %inc, i32* %i, align 4, !dbg !14 %cmp = icmp slt i32 %0, 400000000, !dbg !14 br i1 %cmp, label %while.body, label %while.end, !dbg !14 -; CHECK: edge while.cond -> while.body probability is 0x77f2798d / 0x80000000 = 93.71% [HOT edge] -; CHECK: edge while.cond -> while.end probability is 0x080d8673 / 0x80000000 = 6.29% +; CHECK: edge while.cond -> while.body probability is 0x7ff49cc2 / 0x80000000 = 99.97% [HOT edge] +; CHECK: edge while.cond -> while.end probability is 0x000b633e / 0x80000000 = 0.03% while.body: ; preds = %while.cond %1 = load i32, i32* %i, align 4, !dbg !16 Index: test/Transforms/SampleProfile/compact-binary-profile.ll =================================================================== --- test/Transforms/SampleProfile/compact-binary-profile.ll +++ test/Transforms/SampleProfile/compact-binary-profile.ll @@ -1,3 +1,4 @@ +; ... the profile data is not very reasonable here ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -S | FileCheck %s ; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.compactbinary.afdo -S | FileCheck %s @@ -28,7 +29,7 @@ ; CHECK: call i32 (i8*, ...) @printf{{.*}} !prof ![[IDX3:[0-9]*]] ; CHECK: = !{!"TotalCount", i64 10944} ; CHECK: = !{!"MaxCount", i64 5553} -; CHECK: ![[IDX1]] = !{!"branch_weights", i32 5392, i32 163} +; CHECK: ![[IDX1]] = !{!"branch_weights", i32 5392, i32 2} ; CHECK: ![[IDX2]] = !{!"branch_weights", i32 5280, i32 113} ; CHECK: ![[IDX3]] = !{!"branch_weights", i32 1} Index: test/Transforms/SampleProfile/cov-zero-samples.ll =================================================================== --- test/Transforms/SampleProfile/cov-zero-samples.ll +++ test/Transforms/SampleProfile/cov-zero-samples.ll @@ -3,10 +3,7 @@ ; ; CHECK: remark: cov-zero-samples.cc:9:29: Applied 404065 samples from profile (offset: 2.1) ; CHECK: remark: cov-zero-samples.cc:10:9: Applied 443089 samples from profile (offset: 3) -; CHECK: remark: cov-zero-samples.cc:10:36: Applied 0 samples from profile (offset: 3.1) ; CHECK: remark: cov-zero-samples.cc:11:12: Applied 404066 samples from profile (offset: 4) -; CHECK: remark: cov-zero-samples.cc:13:25: Applied 0 samples from profile (offset: 6) -; CHECK: remark: cov-zero-samples.cc:14:3: Applied 0 samples from profile (offset: 7) ; CHECK: remark: cov-zero-samples.cc:10:9: most popular destination for conditional branches at cov-zero-samples.cc:9:3 ; CHECK: remark: cov-zero-samples.cc:11:12: most popular destination for conditional branches at cov-zero-samples.cc:10:9 ; Index: test/Transforms/SampleProfile/fnptr.ll =================================================================== --- test/Transforms/SampleProfile/fnptr.ll +++ test/Transforms/SampleProfile/fnptr.ll @@ -8,12 +8,10 @@ ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s -; CHECK: edge for.body3 -> if.then probability is 0x1a56a56a / 0x80000000 = 20.58% -; CHECK: edge for.body3 -> if.else probability is 0x65a95a96 / 0x80000000 = 79.42% -; CHECK: edge for.inc -> for.inc12 probability is 0x000fbd1c / 0x80000000 = 0.05% -; CHECK: edge for.inc -> for.body3 probability is 0x7ff042e4 / 0x80000000 = 99.95% -; CHECK: edge for.inc12 -> for.end14 probability is 0x04000000 / 0x80000000 = 3.12% -; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x7c000000 / 0x80000000 = 96.88% +; CHECK: edge for.body3 -> if.then probability is 0x010b8ee1 / 0x80000000 = 0.82% +; CHECK: edge for.body3 -> if.else probability is 0x7ef4711f / 0x80000000 = 99.18% [HOT edge] +; CHECK: edge for.inc12 -> for.end14 probability is 0x55555555 / 0x80000000 = 66.67% +; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x2aaaaaab / 0x80000000 = 33.33% ; Original C++ test case. ; Index: test/Transforms/SampleProfile/indirect-call-gcc.ll =================================================================== --- test/Transforms/SampleProfile/indirect-call-gcc.ll +++ test/Transforms/SampleProfile/indirect-call-gcc.ll @@ -7,9 +7,9 @@ ; XFAIL: powerpc64-, s390x, mips-, mips64-, sparc define void @test(void ()*) !dbg !3 { - %2 = alloca void ()* + %2 = alloca void ()* store void ()* %0, void ()** %2 - %3 = load void ()*, void ()** %2 + %3 = load void ()*, void ()** %2 ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]] call void %3(), !dbg !4 ret void Index: test/Transforms/SampleProfile/inline-coverage.ll =================================================================== --- test/Transforms/SampleProfile/inline-coverage.ll +++ test/Transforms/SampleProfile/inline-coverage.ll @@ -20,7 +20,6 @@ ; CHECK: remark: coverage.cc:9:21: Applied 23478 samples from profile (offset: 2.1) ; CHECK: remark: coverage.cc:10:16: Applied 23478 samples from profile (offset: 3) ; CHECK: remark: coverage.cc:4:10: Applied 31878 samples from profile (offset: 1) -; CHECK: remark: coverage.cc:11:10: Applied 0 samples from profile (offset: 4) ; CHECK: remark: coverage.cc:10:16: most popular destination for conditional branches at coverage.cc:9:3 ; ; There is one sample record with 0 samples at offset 4 in main() that we never Index: test/Transforms/SampleProfile/propagate.ll =================================================================== --- test/Transforms/SampleProfile/propagate.ll +++ test/Transforms/SampleProfile/propagate.ll @@ -100,8 +100,8 @@ %div4 = sdiv i64 %10, 4, !dbg !51 %cmp5 = icmp sgt i64 %9, %div4, !dbg !52 br i1 %cmp5, label %if.then6, label %if.else7, !dbg !53 -; CHECK: edge if.end -> if.then6 probability is 0x5d89d89e / 0x80000000 = 73.08% -; CHECK: edge if.end -> if.else7 probability is 0x22762762 / 0x80000000 = 26.92% +; CHECK: edge if.end -> if.then6 probability is 0x5c1026c3 / 0x80000000 = 71.92% +; CHECK: edge if.end -> if.else7 probability is 0x23efd93d / 0x80000000 = 28.08% if.then6: ; preds = %if.end %11 = load i32, i32* %y.addr, align 4, !dbg !54 @@ -315,3 +315,4 @@ !102 = !DILocation(line: 28, column: 3, scope: !103) !103 = !DILexicalBlockFile(scope: !86, file: !1, discriminator: 2) !104 = !DILocation(line: 29, column: 3, scope: !86) + Index: unittests/ADT/CMakeLists.txt =================================================================== --- unittests/ADT/CMakeLists.txt +++ unittests/ADT/CMakeLists.txt @@ -39,6 +39,7 @@ MakeUniqueTest.cpp MappedIteratorTest.cpp MapVectorTest.cpp + NestedSCCTest.cpp # facebook T38406375 OptionalTest.cpp PackedVectorTest.cpp PointerEmbeddedIntTest.cpp Index: unittests/ADT/NestedSCCTest.cpp =================================================================== --- /dev/null +++ unittests/ADT/NestedSCCTest.cpp @@ -0,0 +1,284 @@ +//===----- llvm/unittest/ADT/NestedSCC.cpp - NestedSCC tests --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// facebook T38406375 + +#include "llvm/ADT/NestedSCC.h" +#include "SimpleGraph.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/RandomNumberGenerator.h" +#include "gtest/gtest.h" +#include + +#define DEBUG_NESTEDSCC_TEST 0 +using namespace llvm; +using namespace simple; +namespace llvm { +namespace { +class SimpleRNG { + uint64_t state; + +public: + SimpleRNG(uint64_t seed) : state(seed) {} + unsigned next(unsigned range) { + state = (state * 1664525 + 1013904223); + return state % range; + } +}; +} // namespace + +using RNG = SimpleRNG; +using NodeType = Graph::NodeType; + +#if DEBUG_NESTEDSCC_TEST +static void dump(const Graph &G) { + dbgs() << "\n"; + for (unsigned idx = 0; idx < G.size(); idx++) { + NodeType *Node = G.getNode(idx); + dbgs() << "node " << idx << " [ " << Node->label() << " ]:"; + for (NodeType *Succ : children(Node)) + dbgs() << " " << Succ->index(); + dbgs() << "\n"; + } +} +#endif + +// Build a random DAG with N nodes with a single +// and a sink exit where every node is on somepath +// from entry to exit +static Graph buildDagGraph(unsigned N, RNG *rng) { + Graph G(N); + for (unsigned i = 1; i < N - 1; i++) { + unsigned pred = rng->next(i); + unsigned succ = i + 1 + rng->next(N - i - 1); + G.addEdge(pred, i); + G.addEdge(i, succ); + } + return G; +} + +using Edge = std::pair; +using BackEdgeSet = DenseSet; + +// expand node Idx into a loop with N nodes. +// Add a back edge into Idx and insert that edge into backEdges +static void makeLoop(Graph &G, unsigned idx, unsigned N, RNG *rng, + DenseMap *loopMap, + BackEdgeSet *backEdges, unsigned loopNumber) { + + // Build a random graph, + // Copy it as a subgraph where we unify its entry node + // with Idx in G. + // The copy of the exit node has a back edge to Idx + // Randomly place original successors from Idx to nodes + // in G. + + // These loops are all reducible which makes it easy + // to identify via labels node which are in the same loop. + // If we create irreducible loops, then they can be treated + // as a nest of loops depending on which node is chosen as + // the "entry" + Graph S = buildDagGraph(N, rng); + + // Save a copy and clear successor for Idx + SmallVector successors(std::move(G.successors(idx))); + + SmallVector Map(N); + Map[0] = idx; + (*loopMap)[idx] = loopNumber; + for (unsigned i = 1; i < N; i++) { + Map[i] = G.addNode()->index(); + (*loopMap)[Map[i]] = loopNumber; + } + + for (unsigned i = 0; i < N - 1; i++) + for (unsigned succ : S.getNode(i)->successors()) + G.addEdge(Map[i], Map[succ]); + + // Randomly select exits which branch to the + // original successors + for (unsigned s : successors) { + unsigned exit = Map[rng->next(N)]; + G.addEdge(exit, s); + // If an edge was a back edge, then that edge + // is no longer an edge (and hence not a backedge) + // but this new edge is a back edge. + if (backEdges->erase({idx, s})) + backEdges->insert({exit, s}); + } + + // Make a loop by adding a back edge + unsigned entryIdx = Map.front(); + unsigned exitIdx = Map.back(); + G.addEdge(exitIdx, entryIdx); + backEdges->insert({exitIdx, entryIdx}); +} + +using SimpleSubGraph = SubGraph>; +template <> struct GraphTraits { + using NodeRef = SimpleSubGraph::NodeRef; + static NodeRef getEntryNode(const SimpleSubGraph &G) { + return G.getEntryNode(); + } + using ChildIteratorType = SimpleSubGraph::ChildIterator; + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(GraphTraits::child_end(N.first), + N.second); + } +}; + +using SG = SubGraph>; +using SCCNestT = SCCNest, GraphTraits>; +using NestedSCCT = SCCNestT::NestedSCCT; +using ComponentRef = SCCNestT::ComponentRef; +// Find common ancestor of two SCC's +static NestedSCCT *common(NestedSCCT *x, NestedSCCT *y) { + unsigned xd = x->getDepth(); + unsigned yd = y->getDepth(); + for (; xd > yd; xd -= 1) + x = x->getParent(); + for (; yd > xd; yd -= 1) + y = y->getParent(); + while (x != y) { + assert(x->getDepth() > 0); + x = x->getParent(); + y = y->getParent(); + } + return x; +} +// Map component ref to parent node in "C" +static ComponentRef getParent(ComponentRef r, NestedSCCT *C) { + while (r.first != C) { + r.second = r.first->getParentIndex(); + r.first = r.first->getParent(); + } + return r; +} + +TEST(NestedSCCTest, EnumeratedGraphs) { + // Enumerate graphs with loops: + // Enumerate a DAG with "sqrt(N)" nodes. + // Randomy pick nodes to expand into loops unto + // we reach size N + SimpleRNG RNG(1234); + // Record where we expanded nodes into loops + struct LoopInfo { + unsigned Idx; + unsigned Size; + unsigned ParentLoop; + }; + RNG.next(1); + const unsigned MIN_GRAPH_SIZE = 40; + // Run a number of graphs + for (unsigned i = 0; i < 5; i++) { + SmallVector Loops; + Graph G = buildDagGraph(10, &RNG); + DenseSet isLoopEntry; + DenseMap node2loopMap; + auto getLoop = [&node2loopMap](NodeType *N) { + return node2loopMap.try_emplace(N->index(), 0).first->second; + }; + BackEdgeSet backEdges; + Loops.push_back({0, G.size(), std::numeric_limits::max()}); + + // Pick a node and expand it into a loop. + // Avoid picking the same node twice so that the + // LoopInfo is simple to maintain. + while (G.size() < MIN_GRAPH_SIZE) { + unsigned loopEntry = 1 + RNG.next(G.size() - 2); + if (!isLoopEntry.insert(loopEntry).second) + continue; + unsigned loopSize = 4 + RNG.next(4); + unsigned loopParent = getLoop(G.getNode(loopEntry)); +#if DEBUG_NESTEDSCC_TEST + dbgs() << "loop: " << loopEntry << " size " << loopEntry << " in " + << loopParent << "\n"; +#endif + makeLoop(G, loopEntry, loopSize, &RNG, &node2loopMap, &backEdges, + static_cast(Loops.size())); + Loops.push_back({loopEntry, loopSize, loopParent}); + } +#if DEBUG_NESTEDSCC_TEST + dump(G); +#endif + SCCNestT scc(&G); +#if DEBUG_NESTEDSCC_TEST + scc.dump(dbgs()); +#endif + + // Verification steps... + // First, all nodes in a loop are mapped to the same component + DenseMap labelMap; + for (NodeType &Node : G.nodes()) { + NestedSCCT *C = scc.getComponent(&Node).first; + ASSERT_NE(C, nullptr); // every node is reachable + // All nodes in a component should have the same label... + auto pair = labelMap.try_emplace(C, getLoop(&Node)); + if (!pair.second) { + // All nodes with the same label are in the same component + EXPECT_EQ(pair.first->getSecond(), getLoop(&Node)); + } + NestedSCCT *P = C->getParent(); + if (!P) { + // the root node has label 0 + EXPECT_EQ(getLoop(&Node), 0u); + continue; + } + unsigned parentLabel = Loops[getLoop(&Node)].ParentLoop; + pair = labelMap.try_emplace(P, parentLabel); + if (!pair.second) { + // The parent is the same as in the loop structure + EXPECT_EQ(pair.first->getSecond(), parentLabel); + } + } + + DenseSet visited; + // Verify iteration over and within regions + for (NestedSCCT &C : scc_depth_first(scc)) { + auto iter = labelMap.find(&C); + ASSERT_TRUE(iter != labelMap.end()); + if (NestedSCCT *P = C.getParent()) { + ASSERT_TRUE(visited.count(P)); + } + visited.insert(&C); + unsigned componentLabel = iter->getSecond(); + for (const NestedSCCT::Element &E : C.getSCCs()) { + if (E.getNested()) + continue; + // leaf noes have the same label as the component + EXPECT_EQ(componentLabel, getLoop(E.getBaseNode())); + } + } + + // Verify that SCC nodes are topologically ordered with + // respect to the non-backedge in the underlying graph + for (NodeType &source : G.nodes()) { + unsigned SrcIdx = source.index(); + ComponentRef sourceComponent = scc.getComponent(&source); + for (NodeType *sink : children(&source)) { + // Map source and sink to a common SCC + SCCNestT::ComponentRef sinkComponent = scc.getComponent(sink); + NestedSCCT *commonComponent = + common(sourceComponent.first, sinkComponent.first); + ComponentRef SourceR = getParent(sourceComponent, commonComponent); + ComponentRef SinkR = getParent(sinkComponent, commonComponent); + if (backEdges.count({SrcIdx, sink->index()})) { + // All loops are reducible so back edges go to the entry + EXPECT_EQ(SinkR.second, 0u); + continue; + } + EXPECT_LE(SourceR.second, SinkR.second); + } + } + } +} +} // namespace llvm Index: unittests/ADT/SimpleGraph.h =================================================================== --- /dev/null +++ unittests/ADT/SimpleGraph.h @@ -0,0 +1,176 @@ +//===- llvm/unittest/ADT/TestGraph.h - Graph for testing ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Common unbounded graph data structure for testing. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_UNITTESTS_ADT_TEST_GRAPH_H +#define LLVM_UNITTESTS_ADT_TEST_GRAPH_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallVector.h" +#include +#include +#include +#include + +namespace llvm { +namespace simple { +class Graph { + void assertValid(unsigned Idx) const { + assert(Idx < size() && "Invalid node index!"); + } + using SuccesorVec = SmallVectorImpl; + +public: + /// NodeSubset - A subset of the graph's nodes. + class NodeType { + Graph *G; + /// Index -- is the index of this node in G->Nodes + unsigned Index; + /// Indicies (in G->Nodes) of successors of this node. + SmallVector Successors; + + public: + /// NodeSubset - Default constructor, creates an empty subset. + NodeType(Graph *G, unsigned Index) : G(G), Index(Index) {} + unsigned index() const { return Index; } + + /// AddNode - Add the node with the given index to the subset. + bool addSuccessor(unsigned Index) { + G->assertValid(Index); + for (unsigned S : Successors) + if (S == Index) + return false; + Successors.push_back(Index); + return true; + } + + const SuccesorVec &successors() const { return Successors; } + SuccesorVec &successors() { return Successors; } + }; + +private: + /// Nodes - The list of nodes for this graph. + SmallVector Nodes; + +public: + /// Graph - Default constructor. Creates an empty graph. + Graph(unsigned N) { + for (unsigned i = 0; i != N; ++i) + addNode(); + } + unsigned size() const { return static_cast(Nodes.size()); } + NodeType *addNode() { + unsigned result = size(); + Nodes.emplace_back(this, result); + return getNode(result); + } + + /// addEdge - Add an edge from the node with index FromIdx to the node with + /// index ToIdx. + void addEdge(unsigned FromIdx, unsigned ToIdx) { + assertValid(FromIdx); + Nodes[FromIdx].addSuccessor(ToIdx); + } + + /// getNode - Get a pointer to the node with the given index. + NodeType *getNode(unsigned Idx) const { + assertValid(Idx); + // The constant cast is needed when working with GraphTraits, which insists + // on taking a constant Graph. + return const_cast(&Nodes[Idx]); + } + + /// ChildIterator - Visit all children of a node. + class ChildIterator { + friend class Graph; + + /// FirstNode - Pointer to first node in the graph's Nodes array. + NodeType *FirstNode; + /// Children -- base iterator over succesor indices + using It = SuccesorVec::const_iterator; + It Children; + + ChildIterator(); // Disable default constructor. + protected: + ChildIterator(NodeType *F, It Children) + : FirstNode(F), Children(Children) {} + + public: + /// ChildIterator - Copy constructor. + ChildIterator(const ChildIterator &other) + : FirstNode(other.FirstNode), Children(other.Children) {} + + /// Comparison operators. + bool operator==(const ChildIterator &other) const { + return other.FirstNode == this->FirstNode && + other.Children == this->Children; + } + bool operator!=(const ChildIterator &other) const { + return !(*this == other); + } + + /// Prefix increment operator. + ChildIterator &operator++() { + Children++; + return *this; + } + + /// Postfix increment operator. + ChildIterator operator++(int) { + ChildIterator Result(*this); + ++(*this); + return Result; + } + + /// Dereference operator. + NodeType *operator*() { return FirstNode + *Children; } + }; + + /// child_begin - Return an iterator pointing to the first child of the given + /// node. + static ChildIterator child_begin(NodeType *Parent) { + return ChildIterator(Parent - Parent->index(), + Parent->successors().begin()); + } + + /// child_end - Return the end iterator for children of the given node. + static ChildIterator child_end(NodeType *Parent) { + return ChildIterator(Parent - Parent->index(), Parent->successors().end()); + } + + SuccesorVec &successors(unsigned Idx) { return Nodes[Idx].successors(); } + SmallVectorImpl &nodes() { return Nodes; } +}; + +iterator_range children(Graph::NodeType *Node) { + return {Graph::child_begin(Node), Graph::child_end(Node)}; +} + +} // namespace simple + +template <> struct GraphTraits { + using Graph = simple::Graph; + typedef typename Graph::NodeType *NodeRef; + typedef typename Graph::ChildIterator ChildIteratorType; + + static NodeRef getEntryNode(const Graph *G) { return G->getNode(0); } + static ChildIteratorType child_begin(NodeRef Node) { + return Graph::child_begin(Node); + } + static ChildIteratorType child_end(NodeRef Node) { + return Graph::child_end(Node); + } +}; + +} // End namespace llvm + +#endif