Index: include/llvm/Analysis/BlockFrequencyImpl.h =================================================================== --- include/llvm/Analysis/BlockFrequencyImpl.h +++ /dev/null @@ -1,379 +0,0 @@ -//===-- BlockFrequencyImpl.h - Block Frequency Implementation --*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Shared implementation of BlockFrequency for IR and Machine Instructions. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H -#define LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/Support/BlockFrequency.h" -#include "llvm/Support/BranchProbability.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include -#include - -namespace llvm { - - -class BlockFrequencyInfo; -class MachineBlockFrequencyInfo; - -/// BlockFrequencyImpl implements block frequency algorithm for IR and -/// Machine Instructions. Algorithm starts with value ENTRY_FREQ -/// for the entry block and then propagates frequencies using branch weights -/// from (Machine)BranchProbabilityInfo. LoopInfo is not required because -/// algorithm can find "backedges" by itself. -template -class BlockFrequencyImpl { - - DenseMap Freqs; - - BlockProbInfoT *BPI; - - FunctionT *Fn; - - typedef GraphTraits< Inverse > GT; - - static const uint64_t EntryFreq = 1 << 14; - - std::string getBlockName(BasicBlock *BB) const { - return BB->getName().str(); - } - - std::string getBlockName(MachineBasicBlock *MBB) const { - std::string str; - raw_string_ostream ss(str); - ss << "BB#" << MBB->getNumber(); - - if (const BasicBlock *BB = MBB->getBasicBlock()) - ss << " derived from LLVM BB " << BB->getName(); - - return ss.str(); - } - - void setBlockFreq(BlockT *BB, BlockFrequency Freq) { - Freqs[BB] = Freq; - DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = "; - printBlockFreq(dbgs(), Freq) << "\n"); - } - - /// getEdgeFreq - Return edge frequency based on SRC frequency and Src -> Dst - /// edge probability. - BlockFrequency getEdgeFreq(BlockT *Src, BlockT *Dst) const { - BranchProbability Prob = BPI->getEdgeProbability(Src, Dst); - return getBlockFreq(Src) * Prob; - } - - /// incBlockFreq - Increase BB block frequency by FREQ. - /// - void incBlockFreq(BlockT *BB, BlockFrequency Freq) { - Freqs[BB] += Freq; - DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += "; - printBlockFreq(dbgs(), Freq) << " --> "; - printBlockFreq(dbgs(), Freqs[BB]) << "\n"); - } - - // All blocks in postorder. - std::vector POT; - - // Map Block -> Position in reverse-postorder list. - DenseMap RPO; - - // For each loop header, record the per-iteration probability of exiting the - // loop. This is the reciprocal of the expected number of loop iterations. - typedef DenseMap LoopExitProbMap; - LoopExitProbMap LoopExitProb; - - // (reverse-)postorder traversal iterators. - typedef typename std::vector::iterator pot_iterator; - typedef typename std::vector::reverse_iterator rpot_iterator; - - pot_iterator pot_begin() { return POT.begin(); } - pot_iterator pot_end() { return POT.end(); } - - rpot_iterator rpot_begin() { return POT.rbegin(); } - rpot_iterator rpot_end() { return POT.rend(); } - - rpot_iterator rpot_at(BlockT *BB) { - rpot_iterator I = rpot_begin(); - unsigned idx = RPO.lookup(BB); - assert(idx); - std::advance(I, idx - 1); - - assert(*I == BB); - return I; - } - - /// isBackedge - Return if edge Src -> Dst is a reachable backedge. - /// - bool isBackedge(BlockT *Src, BlockT *Dst) const { - unsigned a = RPO.lookup(Src); - if (!a) - return false; - unsigned b = RPO.lookup(Dst); - assert(b && "Destination block should be reachable"); - return a >= b; - } - - /// getSingleBlockPred - return single BB block predecessor or NULL if - /// BB has none or more predecessors. - BlockT *getSingleBlockPred(BlockT *BB) { - typename GT::ChildIteratorType - PI = GraphTraits< Inverse >::child_begin(BB), - PE = GraphTraits< Inverse >::child_end(BB); - - if (PI == PE) - return 0; - - BlockT *Pred = *PI; - - ++PI; - if (PI != PE) - return 0; - - return Pred; - } - - void doBlock(BlockT *BB, BlockT *LoopHead, - SmallPtrSet &BlocksInLoop) { - - DEBUG(dbgs() << "doBlock(" << getBlockName(BB) << ")\n"); - setBlockFreq(BB, 0); - - if (BB == LoopHead) { - setBlockFreq(BB, EntryFreq); - return; - } - - if (BlockT *Pred = getSingleBlockPred(BB)) { - if (BlocksInLoop.count(Pred)) - setBlockFreq(BB, getEdgeFreq(Pred, BB)); - // TODO: else? irreducible, ignore it for now. - return; - } - - bool isInLoop = false; - bool isLoopHead = false; - - for (typename GT::ChildIteratorType - PI = GraphTraits< Inverse >::child_begin(BB), - PE = GraphTraits< Inverse >::child_end(BB); - PI != PE; ++PI) { - BlockT *Pred = *PI; - - if (isBackedge(Pred, BB)) { - isLoopHead = true; - } else if (BlocksInLoop.count(Pred)) { - incBlockFreq(BB, getEdgeFreq(Pred, BB)); - isInLoop = true; - } - // TODO: else? irreducible. - } - - if (!isInLoop) - return; - - if (!isLoopHead) - return; - - // This block is a loop header, so boost its frequency by the expected - // number of loop iterations. The loop blocks will be revisited so they all - // get this boost. - typename LoopExitProbMap::const_iterator I = LoopExitProb.find(BB); - assert(I != LoopExitProb.end() && "Loop header missing from table"); - Freqs[BB] /= I->second; - DEBUG(dbgs() << "Loop header scaled to "; - printBlockFreq(dbgs(), Freqs[BB]) << ".\n"); - } - - /// doLoop - Propagate block frequency down through the loop. - void doLoop(BlockT *Head, BlockT *Tail) { - DEBUG(dbgs() << "doLoop(" << getBlockName(Head) << ", " - << getBlockName(Tail) << ")\n"); - - SmallPtrSet BlocksInLoop; - - for (rpot_iterator I = rpot_at(Head), E = rpot_at(Tail); ; ++I) { - BlockT *BB = *I; - doBlock(BB, Head, BlocksInLoop); - - BlocksInLoop.insert(BB); - if (I == E) - break; - } - - // Compute loop's cyclic probability using backedges probabilities. - BlockFrequency BackFreq; - for (typename GT::ChildIteratorType - PI = GraphTraits< Inverse >::child_begin(Head), - PE = GraphTraits< Inverse >::child_end(Head); - PI != PE; ++PI) { - BlockT *Pred = *PI; - assert(Pred); - if (isBackedge(Pred, Head)) - BackFreq += getEdgeFreq(Pred, Head); - } - - // The cyclic probability is freq(BackEdges) / freq(Head), where freq(Head) - // only counts edges entering the loop, not the loop backedges. - // The probability of leaving the loop on each iteration is: - // - // ExitProb = 1 - CyclicProb - // - // The Expected number of loop iterations is: - // - // Iterations = 1 / ExitProb - // - uint64_t D = std::max(getBlockFreq(Head).getFrequency(), UINT64_C(1)); - uint64_t N = std::max(BackFreq.getFrequency(), UINT64_C(1)); - if (N < D) - N = D - N; - else - // We'd expect N < D, but rounding and saturation means that can't be - // guaranteed. - N = 1; - - // Now ExitProb = N / D, make sure it fits in an i32/i32 fraction. - assert(N <= D); - if (D > UINT32_MAX) { - unsigned Shift = 32 - countLeadingZeros(D); - D >>= Shift; - N >>= Shift; - if (N == 0) - N = 1; - } - BranchProbability LEP = BranchProbability(N, D); - LoopExitProb.insert(std::make_pair(Head, LEP)); - DEBUG(dbgs() << "LoopExitProb[" << getBlockName(Head) << "] = " << LEP - << " from 1 - "; - printBlockFreq(dbgs(), BackFreq) << " / "; - printBlockFreq(dbgs(), getBlockFreq(Head)) << ".\n"); - } - - friend class BlockFrequencyInfo; - friend class MachineBlockFrequencyInfo; - - BlockFrequencyImpl() { } - - void doFunction(FunctionT *fn, BlockProbInfoT *bpi) { - Fn = fn; - BPI = bpi; - - // Clear everything. - RPO.clear(); - POT.clear(); - LoopExitProb.clear(); - Freqs.clear(); - - BlockT *EntryBlock = fn->begin(); - - std::copy(po_begin(EntryBlock), po_end(EntryBlock), std::back_inserter(POT)); - - unsigned RPOidx = 0; - for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) { - BlockT *BB = *I; - RPO[BB] = ++RPOidx; - DEBUG(dbgs() << "RPO[" << getBlockName(BB) << "] = " << RPO[BB] << "\n"); - } - - // Travel over all blocks in postorder. - for (pot_iterator I = pot_begin(), E = pot_end(); I != E; ++I) { - BlockT *BB = *I; - BlockT *LastTail = 0; - DEBUG(dbgs() << "POT: " << getBlockName(BB) << "\n"); - - for (typename GT::ChildIteratorType - PI = GraphTraits< Inverse >::child_begin(BB), - PE = GraphTraits< Inverse >::child_end(BB); - PI != PE; ++PI) { - - BlockT *Pred = *PI; - if (isBackedge(Pred, BB) && (!LastTail || RPO[Pred] > RPO[LastTail])) - LastTail = Pred; - } - - if (LastTail) - doLoop(BB, LastTail); - } - - // At the end assume the whole function as a loop, and travel over it once - // again. - doLoop(*(rpot_begin()), *(pot_begin())); - } - -public: - - uint64_t getEntryFreq() { return EntryFreq; } - - /// getBlockFreq - Return block frequency. Return 0 if we don't have it. - BlockFrequency getBlockFreq(const BlockT *BB) const { - typename DenseMap::const_iterator - I = Freqs.find(BB); - if (I != Freqs.end()) - return I->second; - return 0; - } - - void print(raw_ostream &OS) const { - OS << "\n\n---- Block Freqs ----\n"; - for (typename FunctionT::iterator I = Fn->begin(), E = Fn->end(); I != E;) { - BlockT *BB = I++; - OS << " " << getBlockName(BB) << " = "; - printBlockFreq(OS, getBlockFreq(BB)) << "\n"; - - for (typename GraphTraits::ChildIteratorType - SI = GraphTraits::child_begin(BB), - SE = GraphTraits::child_end(BB); SI != SE; ++SI) { - BlockT *Succ = *SI; - OS << " " << getBlockName(BB) << " -> " << getBlockName(Succ) - << " = "; printBlockFreq(OS, getEdgeFreq(BB, Succ)) << "\n"; - } - } - } - - void dump() const { - print(dbgs()); - } - - // Utility method that looks up the block frequency associated with BB and - // prints it to OS. - raw_ostream &printBlockFreq(raw_ostream &OS, - const BlockT *BB) { - return printBlockFreq(OS, getBlockFreq(BB)); - } - - raw_ostream &printBlockFreq(raw_ostream &OS, - const BlockFrequency &Freq) const { - // Convert fixed-point number to decimal. - uint64_t Frequency = Freq.getFrequency(); - OS << Frequency / EntryFreq << "."; - uint64_t Rem = Frequency % EntryFreq; - uint64_t Eps = 1; - do { - Rem *= 10; - Eps *= 10; - OS << Rem / EntryFreq; - Rem = Rem % EntryFreq; - } while (Rem >= Eps/2); - return OS; - } - -}; - -} - -#endif Index: include/llvm/Analysis/BlockFrequencyInfo.h =================================================================== --- include/llvm/Analysis/BlockFrequencyInfo.h +++ include/llvm/Analysis/BlockFrequencyInfo.h @@ -1,4 +1,4 @@ -//===------- BlockFrequencyInfo.h - Block Frequency Analysis --*- C++ -*---===// +//===- BlockFrequencyInfo.h - Block Frequency Analysis ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -22,12 +22,12 @@ class BranchProbabilityInfo; template -class BlockFrequencyImpl; +class BlockFrequencyInfoImpl; -/// BlockFrequencyInfo pass uses BlockFrequencyImpl implementation to estimate -/// IR basic block frequencies. +/// BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to +/// estimate IR basic block frequencies. class BlockFrequencyInfo : public FunctionPass { - typedef BlockFrequencyImpl + typedef BlockFrequencyInfoImpl ImplType; std::unique_ptr BFI; Index: include/llvm/Analysis/BlockFrequencyInfoImpl.h =================================================================== --- /dev/null +++ include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -0,0 +1,494 @@ +//==- BlockFrequencyInfoImpl.h - Block Frequency Implementation -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Shared implementation of BlockFrequency for IR and Machine Instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H +#define LLVM_ANALYSIS_BLOCKFREQUENCYINFOIMPL_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/BlockMass.h" +#include "llvm/Support/PositiveFloat.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include +#include + +namespace llvm { + +class raw_ostream; +class BasicBlock; +class MachineBasicBlock; + +/// \brief Base class for BlockFrequencyInfoImpl +/// +/// BlockFrequencyInfoImplBase has supporting data structures and some +/// algorithms for BlockFrequencyInfoImplBase. Only algorithms that depend on +/// the block type (or that call such algorithms) are skipped here. +/// +/// Nevertheless, the majority of the overall algorithm documention lives with +/// BlockFrequencyInfoImpl. See there for details. +class BlockFrequencyInfoImplBase { +public: + typedef PositiveFloat Float; + + /// \brief Representative of a block. + /// + /// This is a simple wrapper around an index into the reverse-post-order + /// traversal of the blocks. + /// + /// Unlike a block pointer, its order has meaning (location in the + /// topological sort) and it's class is the same regardless of block type. + struct BlockNode { + typedef uint32_t IndexType; + IndexType Index; + + bool operator==(const BlockNode &X) const { return Index == X.Index; } + bool operator!=(const BlockNode &X) const { return Index != X.Index; } + bool operator<=(const BlockNode &X) const { return Index <= X.Index; } + bool operator>=(const BlockNode &X) const { return Index >= X.Index; } + bool operator<(const BlockNode &X) const { return Index < X.Index; } + bool operator>(const BlockNode &X) const { return Index > X.Index; } + + BlockNode() : Index(UINT32_MAX) { } + BlockNode(IndexType Index) : Index(Index) { } + + bool isValid() const { return Index <= getMaxIndex(); } + static size_t getMaxIndex() { return UINT32_MAX - 1; } + }; + + /// \brief Stats about a block itself. + struct FrequencyData { + Float Floating; + uint64_t Integer; + }; + + /// \brief Index of loop information. + struct WorkingData { + BlockNode LatestBackedge; ///< This block's last backedge. + BlockNode ContainingLoop; ///< The block whose loop this block is inside. + uint32_t LoopIndex; ///< Index into PackagedLoops. + bool IsPackaged; ///< Has ContainingLoop been packaged up? + bool IsAPackage; ///< Has this block's loop been packaged up? + BlockMass Mass; ///< Mass distribution from the entry block. + + WorkingData() + : LoopIndex(UINT32_MAX), IsPackaged(false), IsAPackage(false) { } + + bool hasLoopHeader() const { + return ContainingLoop.isValid(); + } + bool isLoopHeader() const { + return LatestBackedge.isValid(); + } + }; + + /// \brief Unscaled probability weight. + /// + /// Probability weight for an edge in the graph (including the + /// successor/target node). + /// + /// All edges in the original function are 32-bit. However, exit edges from + /// loop packages are taken from 64-bit exit masses, so we need 64-bits of + /// space in general. + /// + /// In addition to the raw weight amount, Weight stores the type of the edge + /// in the current context (i.e., the context of the loop being processed). + /// Is this a local edge within the loop, an exit from the loop, or a + /// backedge to the loop header? + struct Weight { + enum DistType { Local, Exit, Backedge }; + DistType Type; + BlockNode TargetNode; + uint64_t Amount; + Weight() : Type(Local), Amount(0) { } + }; + + /// \brief Distribution of unscaled probability weight. + /// + /// Distribution of unscaled probability weight to a set of successors. + /// + /// This class collates the successor edge weights for later processing. + /// + /// \a DidOverflow indicates whether \a Total did overflow while adding to + /// the distribution. It should never overflow twice. There's no flag for + /// whether \a ForwardTotal overflows, since when \a Total exceeds 32-bits + /// they both get re-computed during \a normalize(). + struct Distribution { + typedef SmallVector WeightList; + WeightList Weights; ///< Individual successor weights. + uint64_t Total; ///< Sum of all weights. + bool DidOverflow; ///< Whether \a Total did overflow. + uint32_t ForwardTotal; ///< Total excluding backedges. + + Distribution() : Total(0), DidOverflow(false), ForwardTotal(0) { } + void addLocal(const BlockNode &Node, uint64_t Amount) { + add(Node, Amount, Weight::Local); + } + void addExit(const BlockNode &Node, uint64_t Amount) { + add(Node, Amount, Weight::Exit); + } + void addBackedge(const BlockNode &Node, uint64_t Amount) { + add(Node, Amount, Weight::Backedge); + } + + /// \brief Normalize the distribution. + /// + /// Combines multiple edges to the same \a Weight::TargetNode and scales + /// down so that \a Total fits into 32-bits. + /// + /// This is linear in the size of \a Weights. For the vast majority of + /// cases, adjacent edge weights are combined by sorting WeightList and + /// combining adjacent weights. However, for very large edge lists an + /// auxiliary hash table is used. + void normalize(); + private: + void add(const BlockNode &Node, uint64_t Amount, Weight::DistType Type); + }; + + /// \brief Data for a packaged loop. + /// + /// Contains the data necessary to represent represent a loop as a node once + /// it's packaged. + /// + /// PackagedLoopData inherits from BlockData to give the node the necessary + /// stats. Further, it has a list of successors, list of members, and stores + /// the backedge mass assigned to this loop. + struct PackagedLoopData { + typedef SmallVector, 4> ExitMap; + typedef SmallVector MemberList; + ExitMap Exits; ///< Successor edges (and weights). + MemberList Members; ///< Members of the loop. + BlockMass BackedgeMass; ///< Mass returned to loop header. + BlockMass Mass; + Float Scale; + }; + + /// \brief Data about each block. This is used downstream. + std::vector Freqs; + + /// \brief Loop data: see initializeLoops(). + std::vector Working; + + /// \brief Indexed information about packaged loops. + std::vector PackagedLoops; + + /// \brief Create the initial loop packages. + /// + /// Initializes PackagedLoops using the data in Working about backedges + /// and containing loops. Called by initializeLoops(). + /// + /// \post WorkingData::LoopIndex has been initialized for every loop header + /// and PackagedLoopData::Members has been initialized. + void initializeLoopPackages(); + + /// \brief Add all edges out of a packaged loop to the distribution. + /// + /// Adds all edges from LocalLoopHead to Dist. Calls addToDist() to add each + /// successor edge. + void addLoopSuccessorsToDist(const BlockNode &LoopHead, + const BlockNode &LocalLoopHead, + Distribution &Dist); + + /// \brief Add an edge to the distribution. + /// + /// Adds an edge to Succ to Dist. If \c LoopHead.isValid(), then whether the + /// edge is forward/exit/backedge is in the context of LoopHead. Otherwise, + /// every edge should be a forward edge (since all the loops are packaged + /// up). + void addToDist(Distribution &Dist, const BlockNode &LoopHead, + const BlockNode &Succ, uint64_t Weight); + + PackagedLoopData &getLoopPackage(const BlockNode &Head) { + assert(Head.Index < Working.size()); + size_t Index = Working[Head.Index].LoopIndex; + assert(Index < PackagedLoops.size()); + return PackagedLoops[Index]; + } + + /// \brief Distribute mass according to a distribution. + /// + /// Distributes the mass in Source according to Dist. If LoopHead.isValid(), + /// backedges and exits are stored in its entry in PackagedLoops. + /// + /// Mass is distributed in parallel from two copies of the source mass. + /// + /// The first mass (forward) represents the distribution of mass through the + /// local DAG. This distribution should lose mass at loop exits and ignore + /// backedges. + /// + /// The second mass (general) represents the behavior of the loop in the + /// global context. In a given distribution from the head, how much mass + /// exits, and to where? How much mass returns to the loop head? + /// + /// The forward mass should be split up between local successors and exits, + /// but only actually distributed to the local successors. The general mass + /// should be split up between all three types of successors, but distributed + /// only to exits and backedges. + void distributeMass(const BlockNode &Source, const BlockNode &LoopHead, + Distribution &Dist); + + /// \brief Finalize frequency metrics. + /// + /// Unwraps loop packages, calculates final frequencies, and cleans up + /// no-longer-needed data structures. + void finalizeMetrics(); + + /// \brief Clear all memory. + void clear(); + + static std::string getBlockName(const BasicBlock *BB); + static std::string getBlockName(const MachineBasicBlock *MBB); + virtual std::string getBlockName(const BlockNode &Node) const; + + virtual raw_ostream &print(raw_ostream &OS) const { return OS; } + void dump() const { print(dbgs()); } + + Float getFloatingBlockFreq(const BlockNode &Node) const; + + BlockFrequency getBlockFreq(const BlockNode &Node) const; + + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockNode &Node) const; + raw_ostream & + printBlockFreq(raw_ostream &OS, const BlockFrequency &Freq) const; + + uint64_t getEntryFreq() const { + assert(!Freqs.empty()); + return Freqs[0].Integer; + } + /// \brief Virtual destructor. + /// + /// Need a virtual destructor to mask the compiler warning about + /// getBlockName. + virtual ~BlockFrequencyInfoImplBase() { } +}; + +/// \brief Shared implementation for block frequency analysis. +/// +/// This is a shared implementation of BlockFrequencyInfo and +/// MachineBlockFrequencyInfo, and calculates the relative frequencies of +/// blocks. +/// +/// This algorithm leverages BlockMass and PositiveFloat to maintain precision, +/// separates mass distribution from loop scaling, and dithers to eliminate +/// probability mass loss. +/// +/// The implementation is split between BlockFrequencyInfoImpl, which knows the +/// type of graph being modelled (BasicBlock vs. MachineBasicBlock), and +/// BlockFrequencyInfoImplBase, which doesn't. The base class uses \a +/// BlockNode, a wrapper around a uint32_t. BlockNode is numbered from 0 in +/// reverse-post order. This gives two advantages: it's easy to compare the +/// relative ordering of two nodes, and maps keyed on BlockT can be represented +/// by vectors. +/// +/// This algorithm is O(V+E), unless there is irreducible control flow, in +/// which case it's O(V*E) in the worst case. +/// +/// These are the main stages: +/// +/// 0. Reverse post-order traversal (\a initializeRPOT()). +/// +/// Run a single post-order traversal and save it (in reverse) in RPOT. +/// All other stages make use of this ordering. Save a lookup from BlockT +/// to BlockNode (the index into RPOT) in Nodes. +/// +/// 1. Loop indexing (\a initializeLoops()). +/// +/// Indexing loops gives a light-weight description of all loops in the +/// graph. Each node lists its containing loop (if any), and its latest +/// backedge (if any). This is stored in Working, a set of data about a +/// Node that's discarded at the end of the algorithm. +/// +/// A single reverse post-order traversal builds this index. For each +/// node, visit each predecessor. Save the latest backedge predecessor as +/// the loop's latest backedge. Set the containing loop to the same as the +/// latest forward-edge predecessor's containing loop, and store the +/// current node in that loop's list of immediate members. +/// +/// A second pass (\a initializeLoopPackages()) assigns a loop index to +/// each loop header, which is used as an index into \a PackagedLoops. A +/// final pass stores the immediate members of each loop in \a +/// PackagedLoops. Since only immediate members are stored, the memory +/// complexity is O(V). +/// +/// 2. Calculate mass and scale in loops (\a computeMassInLoops()). +/// +/// For each loop (bottom-up), distribute mass through the DAG resulting +/// from ignoring backedges and treating sub-loops as a single pseudo-node. +/// Track the backedge mass distributed to the loop header, and use it to +/// calculate the loop scale (number of loop iterations). +/// +/// Visiting loops bottom-up is a post-order traversal of loop headers. +/// For each loop, immediate members that represent sub-loops will already +/// have been visited and packaged into a pseudo-node. +/// +/// Distributing mass in a loop is a reverse-post-order traversal through +/// the loop. Start by assigning full mass to the Loop header. For each +/// node in the loop: +/// +/// - Fetch and categorize the weight distribution for its successors. +/// If this is a packaged-subloop, the weight distribution is stored +/// in \a PackagedLoopData::Exits. Otherwise, fetch it from +/// BranchProbabilityInfo. +/// +/// - Each successor is categorized as \a Weight::Local, a normal +/// forward edge within the current loop, \a Weight::Backedge, a +/// backedge to the loop header, or \a Weight::Exit, any successor +/// outside the loop. The weight, the successor, and its category +/// are stored in \a Distribution. There can be multiple edges to +/// each successor. +/// +/// - Normalize the distribution: scale weights down so that their sum +/// is 32-bits, and coalesce multiple edges to the same node. +/// +/// - Distribute the mass accordingly, dithering to minimize mass loss, +/// as described in \a distributeMass(). Mass is distributed in +/// parallel in two ways: forward, and general. Local successors +/// take their mass from the forward mass, while exit and backedge +/// successors take their mass from the general mass. Additionally, +/// exit edges use up (ignored) mass from the forward mass, and local +/// edges use up (ignored) mass from the general distribution. +/// +/// Finally, calculate the loop scale from the accumulated backedge mass. +/// +/// 3. Distribute mass in the function (\a computeMassInFunction()). +/// +/// Finally, distribute mass through the DAG resulting from packaging all +/// loops in the function. This uses the same algorithm as distributing +/// mass in a loop, except that there are no exit or backedge edges. +/// +/// 4. Loop unpackaging and cleanup (\a finalizeMetrics()). +/// +/// Initialize the frequency to a floating point representation of its +/// mass. +/// +/// Visit loops top-down (reverse post-order), scaling the loop header's +/// frequency by its psuedo-node's mass and loop scale. Keep track of the +/// minimum and maximum final frequencies. +/// +/// Using the min and max frequencies as a guide, translate floating point +/// frequencies to an appropriate range in uint64_t. +/// +/// It has some known flaws. +/// +/// - Irreducible control flow isn't modelled correctly. In particular, +/// loops with multiple headers will be modelled asymmetrically, as if +/// there is one loop within another (the irreducible edges aren't ignored, +/// but the model is still incorrect). If such loops also have multiple +/// exit blocks, frequencies can be distorted past the end of the loop. +/// +/// - Loop scale is limited to 4096 per loop (2^12) to avoid exhausting +/// the 64-bit integer precision used downstream. +/// +/// - LoopInfo/MachineLoopInfo and Loop/MachineLoop are *not* used. Ideally +/// the loop infrastructure would be shared. +template +class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { + typedef BT BlockT; + typedef FT FunctionT; + typedef PT BlockProbInfoT; + + typedef GraphTraits Successor; + typedef GraphTraits > Predecessor; + + const BlockProbInfoT *BPI; + const FunctionT *F; + + // All blocks in reverse postorder. + std::vector RPOT; + DenseMap Nodes; + + typedef typename std::vector::const_iterator + rpot_iterator; + typedef typename std::vector::const_reverse_iterator + reverse_iterator; + + rpot_iterator rpot_begin() const { return RPOT.begin(); } + rpot_iterator rpot_end() const { return RPOT.end(); } + reverse_iterator rpot_rbegin() const { return RPOT.rbegin(); } + reverse_iterator rpot_rend() const { return RPOT.rend(); } + + rpot_iterator getIterator(const reverse_iterator &I) const { + return rpot_begin() + getIndex(I); + } + + size_t getIndex(const rpot_iterator &I) const { + return I - rpot_begin(); + } + size_t getIndex(const reverse_iterator &I) const { + return RPOT.size() - (I - rpot_rbegin()) - 1; + } + + BlockNode getNode(const rpot_iterator &I) const { + return BlockNode(getIndex(I)); + } + BlockNode getNode(const reverse_iterator &I) const { + return BlockNode(getIndex(I)); + } + BlockNode getNode(const BlockT *BB) const { + return Nodes.lookup(BB); + } + + const BlockT *getBlock(const BlockNode &Node) const { + return RPOT[Node.Index]; + } + + void initializeRPOT(const FunctionT *F); + void initializeLoops(); + void runOnFunction(const FunctionT *F); + + void propagateMassToSuccessors(const BlockNode &LoopHead, + const BlockNode &Node); + void computeMassInLoops(); + void computeMassInLoop(const BlockNode &LoopHead); + void computeMassInFunction(); + + using BlockFrequencyInfoImplBase::getBlockName; + std::string getBlockName(const BlockNode &Node) const override { + return getBlockName(getBlock(Node)); + } + +public: + const FunctionT *getFunction() const { return F; } + + void doFunction(const FunctionT *F, const BlockProbInfoT *BPI); + BlockFrequencyInfoImpl() : BPI(0), F(0) { } + + using BlockFrequencyInfoImplBase::getEntryFreq; + BlockFrequency getBlockFreq(const BlockT *BB) const { + return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); + } + Float getFloatingBlockFreq(const BlockT *BB) const { + return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB)); + } + + /// \brief Print the frequencies for the current function. + /// + /// Prints the frequencies for the blocks in the current function. + /// + /// Blocks are printed in the natural iteration order of the function, rather + /// than reverse post-order. This provides two advantages: writing -analyze + /// tests is easier (since blocks come out in source order), and even + /// unreachable blocks are printed. + raw_ostream &print(raw_ostream &OS) const override; + using BlockFrequencyInfoImplBase::dump; + + using BlockFrequencyInfoImplBase::printBlockFreq; + raw_ostream &printBlockFreq(raw_ostream &OS, const BlockT *BB) const { + return BlockFrequencyInfoImplBase::printBlockFreq(OS, getNode(BB)); + } +}; + +} + +#endif Index: include/llvm/CodeGen/MachineBlockFrequencyInfo.h =================================================================== --- include/llvm/CodeGen/MachineBlockFrequencyInfo.h +++ include/llvm/CodeGen/MachineBlockFrequencyInfo.h @@ -1,4 +1,4 @@ -//====-- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -*- C++ -*--====// +//===- MachineBlockFrequencyInfo.h - MBB Frequency Analysis -*- C++ -*-----===// // // The LLVM Compiler Infrastructure // @@ -23,12 +23,12 @@ class MachineBasicBlock; class MachineBranchProbabilityInfo; template -class BlockFrequencyImpl; +class BlockFrequencyInfoImpl; -/// MachineBlockFrequencyInfo pass uses BlockFrequencyImpl implementation to estimate -/// machine basic block frequencies. +/// MachineBlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation +/// to estimate machine basic block frequencies. class MachineBlockFrequencyInfo : public MachineFunctionPass { - typedef BlockFrequencyImpl + typedef BlockFrequencyInfoImpl ImplType; std::unique_ptr MBFI; Index: include/llvm/Support/BlockMass.h =================================================================== --- /dev/null +++ include/llvm/Support/BlockMass.h @@ -0,0 +1,211 @@ +//===- BlockMass.h - Block mass ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements BlockMass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_BLOCKMASS_H +#define LLVM_SUPPORT_BLOCKMASS_H + +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/type_traits.h" + +namespace llvm { + +template +class PositiveFloat; + +class BranchProbability; +class raw_ostream; + +/// \brief Mass of a block. +/// +/// This class implements a sort of fixed-point fraction always between 0.0 and +/// 1.0. getMass() == UINT64_MAX indicates a value of 1.0. +/// +/// Masses can be added and subtracted. Simple saturation arithmetic is used, +/// so arithmetic operations never overflow or underflow. +/// +/// Masses can be multiplied. Multiplication treats full mass as 1.0 and uses +/// an inexpensive floating-point algorithm that's off-by-one (almost, but not +/// quite, maximum precision). +/// +/// Masses can be scaled by \a BranchProbability at maximum precision. +class BlockMass { + uint64_t Mass; + +public: + BlockMass() : Mass(0) { } + explicit BlockMass(uint64_t Mass) : Mass(Mass) { } + + static BlockMass getEmpty() { return BlockMass(); } + static BlockMass getFull() { return BlockMass(UINT64_MAX); } + + uint64_t getMass() const { return Mass; } + + bool isFull() const { return Mass == UINT64_MAX; } + bool isEmpty() const { return !Mass; } + + bool operator!() const { return isEmpty(); } + + /// \brief Add another mass. + /// + /// Adds another mass, saturating at \a isFull() rather than overflowing. + BlockMass &operator+=(const BlockMass &X) { + uint64_t Sum = Mass + X.Mass; + Mass = Sum < Mass ? UINT64_MAX : Sum; + return *this; + } + + /// \brief Subtract another mass. + /// + /// Subtracts another mass, saturating at \a isEmpty() rather than + /// undeflowing. + BlockMass &operator-=(const BlockMass &X) { + uint64_t Diff = Mass - X.Mass; + Mass = Diff > Mass ? 0 : Diff; + return *this; + } + + /// \brief Scale by another mass. + /// + /// The current implementation is a little imprecise, but it's relatively + /// fast, never overflows, and maintains the property that 1.0*1.0==1.0 + /// (where isFull represents the number 1.0). It's an approximation of + /// 128-bit multiply that gets right-shifted by 64-bits. + /// + /// For a given digit size, multiplying two-digit numbers looks like: + /// + /// U1 . L1 + /// * U2 . L2 + /// ============ + /// 0 . . L1*L2 + /// + 0 . U1*L2 . 0 // (shift left once by a digit-size) + /// + 0 . U2*L1 . 0 // (shift left once by a digit-size) + /// + U1*L2 . 0 . 0 // (shift left twice by a digit-size) + /// + /// BlockMass has 64-bit numbers. Split each into two 32-bit digits, stored + /// 64-bit. Add 1 to the lower digits, to model isFull as 1.0; this won't + /// overflow, since we have 64-bit storage for each digit. + /// + /// To do this accurately, (a) multiply into two 64-bit digits, incrementing + /// the upper digit on overflows of the lower digit (carry), (b) subtract 1 + /// from the lower digit, decrementing the upper digit on underflow (carry), + /// and (c) truncate the lower digit. For the 1.0*1.0 case, the upper digit + /// will be 0 at the end of step (a), and then will underflow back to isFull + /// (1.0) in step (b). + /// + /// Instead, the implementation does something a little faster with a small + /// loss of accuracy: ignore the lower 64-bit digit entirely. The loss of + /// accuracy is small, since the sum of the unmodelled carries is 0 or 1 + /// (i.e., step (a) will overflow at most once, and step (b) will underflow + /// only if step (a) overflows). + /// + /// This is the formula we're calculating: + /// + /// U1.L1 * U2.L2 == U1 * U2 + (U1 * (L2+1))>>32 + (U2 * (L1+1))>>32 + /// + /// As a demonstration of 1.0*1.0, consider two 4-bit numbers that are both + /// full (1111). + /// + /// U1.L1 * U2.L2 == U1 * U2 + (U1 * (L2+1))>>2 + (U2 * (L1+1))>>2 + /// 11.11 * 11.11 == 11 * 11 + (11 * (11+1))/4 + (11 * (11+1))/4 + /// == 1001 + (11 * 100)/4 + (11 * 100)/4 + /// == 1001 + 1100/4 + 1100/4 + /// == 1001 + 0011 + 0011 + /// == 1111 + BlockMass &operator*=(const BlockMass &X) { + uint64_t U1 = Mass >> 32, L1 = Mass & UINT32_MAX, + U2 = X.Mass >> 32, L2 = X.Mass & UINT32_MAX; + Mass = U1 * U2 + (U1 * (L2+1) >> 32) + ((L1+1) * U2 >> 32); + return *this; + } + + /// \brief Multiply by a branch probability. + /// + /// Multiply by P. Guarantees full precision. + /// + /// This could be naively implemented by multiplying by the numerator and + /// dividing by the denominator, but in what order? Multiplying first can + /// overflow, while dividing first will lose precision (potentially, changing + /// a non-zero mass to zero). + /// + /// The implementation mixes the two methods. Since \a BranchProbability + /// uses 32-bits and \a BlockMass 64-bits, shift the mass as far to the left + /// as there is room, then divide by the denominator to get a quotient. + /// Multiplying by the numerator and right shifting gives a first + /// approximation. + /// + /// Calculate the error in this first approximation by calculating the + /// opposite mass (multiply by the opposite numerator and shift) and + /// subtracting both from teh original mass. + /// + /// Add to the first approximation the correct fraction of this error value. + /// This time, multiply first and then divide, since there is no danger of + /// overflow. + /// + /// \pre P represents a fraction between 0.0 and 1.0. + BlockMass &operator*=(const BranchProbability &P); + + bool operator==(const BlockMass &X) const { + return Mass == X.Mass; + } + bool operator<(const BlockMass &X) const { + return Mass < X.Mass; + } + bool operator!=(const BlockMass &X) const { + return !(*this == X); + } + bool operator>(const BlockMass &X) const { + return X < *this; + } + bool operator<=(const BlockMass &X) const { + return !(*this > X); + } + bool operator>=(const BlockMass &X) const { + return !(*this < X); + } + + /// \brief Convert to floating point. + /// + /// Convert to a float. \a isFull() gives 1.0, while \a isEmpty() gives + /// slightly above 0.0. + PositiveFloat toFloat() const; + + void dump() const; + raw_ostream &print(raw_ostream &OS) const; +}; + +inline BlockMass operator+(const BlockMass &L, const BlockMass &R) { + return BlockMass(L) += R; +} +inline BlockMass operator-(const BlockMass &L, const BlockMass &R) { + return BlockMass(L) -= R; +} +inline BlockMass operator*(const BlockMass &L, const BlockMass &R) { + return BlockMass(L) *= R; +} +inline BlockMass operator*(const BlockMass &L, const BranchProbability &R) { + return BlockMass(L) *= R; +} +inline BlockMass operator*(const BranchProbability &L, const BlockMass &R) { + return BlockMass(R) *= L; +} + +inline raw_ostream &operator<<(raw_ostream &OS, const BlockMass &X) { + return X.print(OS); +} + +template <> struct isPodLike { static const bool value = true; }; + +} + +#endif Index: include/llvm/Support/PositiveFloat.h =================================================================== --- /dev/null +++ include/llvm/Support/PositiveFloat.h @@ -0,0 +1,896 @@ +//===- PositiveFloat.h - Simple positive floating point ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements positive floating point numbers. It uses simple +// saturation arithmetic, and every operation is well-defined for every value. +// +// The number is split into a signed exponent and unsigned digits. There is no +// canonical form, so the same number can be represented by many bit +// representations. +// +// It's templated on the underlying integer type for digits, which is expected +// to be one of uint64_t, uint32_t, uint16_t or uint8_t. +// +// Unlike builtin floating point types, PositiveFloat is portable. +// +// Unlike APFloat, PositiveFloat does not model architecture floating point +// behaviour. (This should make it a little faster.) +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_POSITIVEFLOAT_H +#define LLVM_SUPPORT_POSITIVEFLOAT_H + +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/type_traits.h" + +#include +#include +#include +#include +#include + +namespace llvm { + +class raw_ostream; + +class PositiveFloatBase { +public: + static const int MaxExponent = 16383; + static const int MinExponent = -16382; + static const int DefaultPrecision = 10; + + static void dump(uint64_t D, int16_t E, int Width); + static raw_ostream &print(raw_ostream &OS, uint64_t D, int16_t E, int Width, + unsigned Precision); + static std::string toString(uint64_t D, int16_t E, int Width, + unsigned Precision); + static int countLeadingZeros32(uint32_t N) { + return countLeadingZeros(N); + } + static int countLeadingZeros64(uint64_t N) { + return countLeadingZeros(N); + } + static uint64_t getHalf(uint64_t N) { + return (N >> 1) + (N & 1); + } + + static std::pair splitSigned(int64_t N) { + if (N >= 0) + return std::make_pair(N, false); + if (N == INT64_MIN) + return std::make_pair(uint64_t(N) + 1, true); + return std::make_pair(-N, true); + } + static int64_t joinSigned(uint64_t U, bool IsNeg) { + if (U > INT64_MAX) + return IsNeg ? INT64_MIN : INT64_MAX; + return IsNeg ? -int16_t(U) : U; + } + + static int32_t extractLg(const std::pair &Lg) { + return Lg.first; + } + static int32_t extractLgFloor(const std::pair &Lg) { + return Lg.first - (Lg.second > 0); + } + static int32_t extractLgCeiling(const std::pair &Lg) { + return Lg.first + (Lg.second < 0); + } + static uint64_t getDiff(int16_t L, int16_t R) { + assert(L <= R && "arguments in wrong order"); + return (uint64_t)R - (uint64_t)L; + } + + static std::pair divide64(uint64_t L, uint64_t R); + static std::pair multiply64(uint64_t L, uint64_t R); + + static int compare(uint64_t L, uint64_t R, int Shift) { + assert(Shift >= 0); + assert(Shift < 64); + + uint64_t L_adjusted = L >> Shift; + if (L_adjusted < R) + return -1; + if (L_adjusted > R) + return 1; + + return L > L_adjusted << Shift ? 1 : 0; + } +}; + +/// \brief Simple representation of a positive floating point. +/// +/// PositiveFloat is a positive floating point number. It uses simple +/// saturation arithmetic, and every operation is well-defined for every value. +/// +/// The number is split into a signed exponent and unsigned digits. The number +/// represented is \c getDigits()*2^getExponent(). In this way, the digits are +/// much like the mantissa in the x87 long double, but there is no canonical +/// form, so the same number can be represented by many bit representations +/// (it's always in "denormal" mode). +/// +/// PositiveFloat is templated on the underlying integer type for digits, which +/// is expected to be one of uint64_t, uint32_t, uint16_t or uint8_t. +/// +/// Unlike builtin floating point types, PositiveFloat is portable. +/// +/// Unlike APFloat, PositiveFloat does not model architecture floating point +/// behaviour (this should make it a little faster), and implements most +/// operators (this makes it usable). +/// +/// PositiveFloat is totally ordered. However, there is no canonical form, so +/// there are multiple representations of most scalars. E.g.: +/// +/// PositiveFloat(8u, 0) == PositiveFloat(4u, 1) +/// PositiveFloat(4u, 1) == PositiveFloat(2u, 2) +/// PositiveFloat(2u, 2) == PositiveFloat(1u, 3) +/// +/// PositiveFloat implements most arithmetic operations. Precision is kept +/// where possible. Uses simple saturation arithmetic, so that operations +/// saturate to 0.0 or getLargest() rather than under or overflowing. It has +/// some extra arithmetic for unit inversion. 0.0/0.0 is defined to be 0.0. +/// Any other division by 0.0 is defined to be getLargest(). +/// +/// As a convenience for modifying the exponent, left and right shifting are +/// both implemented, and both interpret negative shifts as positive shifts in +/// the opposite direction. +/// +/// Future work might extract most of the implementation into a base class +/// (e.g., \c Float) that has an \c IsSigned template parameter. The initial +/// use case for this only needed positive semantics, but it wouldn't take much +/// work to extend. +/// +/// Exponents are limited to the range accepted by x87 long double. This makes +/// it trivial to add functionality to convert to APFloat (this is already +/// relied on for the implementation of printing). +template +class PositiveFloat : PositiveFloatBase { +public: + static_assert(!std::numeric_limits::is_signed, + "only unsigned floats supported"); + + typedef DigitsT DigitsType; + +private: + typedef std::numeric_limits DigitsLimits; + + static const int Width = sizeof(DigitsType) * 8; + static_assert(Width <= 64, "invalid integer width for digits"); + +private: + DigitsType Digits; + int16_t Exponent; + +public: + PositiveFloat() : Digits(0), Exponent(0) { } + + PositiveFloat(DigitsType Digits, int16_t Exponent) + : Digits(Digits), Exponent(Exponent) { } + +private: + PositiveFloat(const std::pair &X) + : Digits(X.first), Exponent(X.second) { } + +public: + static PositiveFloat getZero() { + return PositiveFloat(0, 0); + } + static PositiveFloat getOne() { + return PositiveFloat(1, 0); + } + static PositiveFloat getLargest() { + return PositiveFloat(DigitsLimits::max(), MaxExponent); + } + static PositiveFloat getFloat(uint64_t N) { + return adjustToWidth(N, 0); + } + static PositiveFloat getInverseFloat(uint64_t N) { + return getFloat(N).invert(); + } + static PositiveFloat getFraction(DigitsType N, DigitsType D) { + return getQuotient(N, D); + } + + int16_t getExponent() const { + return Exponent; + } + DigitsType getDigits() const { + return Digits; + } + + template + IntT toInt() const; + + bool isZero() const { + return !Digits; + } + bool isLargest() const { + return *this == getLargest(); + } + bool isOne() const { + if (Exponent > 0 || Exponent <= -Width) + return false; + return Digits == DigitsType(1) << -Exponent; + } + + /// \brief The log base 2, rounded. + /// + /// Get the lg of the scalar. lg 0 is defined to be INT32_MIN. + int32_t lg() const { + return extractLg(lgImpl()); + } + + /// \brief The log base 2, rounded towards INT32_MIN. + /// + /// Get the lg floor. lg 0 is defined to be INT32_MIN. + int32_t lgFloor() const { + return extractLgFloor(lgImpl()); + } + + /// \brief The log base 2, rounded towards INT32_MAX. + /// + /// Get the lg ceiling. lg 0 is defined to be INT32_MIN. + int32_t lgCeiling() const { + return extractLgCeiling(lgImpl()); + } + + bool operator==(const PositiveFloat &X) const { + return compare(X) == 0; + } + bool operator<(const PositiveFloat &X) const { + return compare(X) < 0; + } + bool operator!=(const PositiveFloat &X) const { + return compare(X) != 0; + } + bool operator>(const PositiveFloat &X) const { + return compare(X) > 0; + } + bool operator<=(const PositiveFloat &X) const { + return compare(X) <= 0; + } + bool operator>=(const PositiveFloat &X) const { + return compare(X) >= 0; + } + + bool operator!() const { + return isZero(); + } + + /// \brief Convert to a decimal representation in a string. + /// + /// Convert to a string. Uses scientific notation for very large/small + /// numbers. Scientific notation is used roughly for numbers outside of the + /// range 2^-64 through 2^64. + /// + /// \c Precision indicates the number of decimal digits of precision to use; + /// 0 requests the maximum available. + /// + /// As a special case to make debugging easier, if the number is small enough + /// to convert without scientific notation and has more than \c Precision + /// digits before the decimal place, it's printed accurately to the first + /// digit past zero. E.g., assuming 10 digits of precision: + /// + /// 98765432198.7654... => 98765432198.8 + /// 8765432198.7654... => 8765432198.8 + /// 765432198.7654... => 765432198.8 + /// 65432198.7654... => 65432198.77 + /// 5432198.7654... => 5432198.765 + std::string toString(unsigned Precision = DefaultPrecision) { + return PositiveFloatBase::toString(Digits, Exponent, Width, Precision); + } + + /// \brief Print a decimal representation. + /// + /// Print a string. See toString for documentation. + raw_ostream &print(raw_ostream &OS, + unsigned Precision = DefaultPrecision) const { + return PositiveFloatBase::print(OS, Digits, Exponent, Width, Precision); + } + void dump() const { + return PositiveFloatBase::dump(Digits, Exponent, Width); + } + + PositiveFloat &operator+=(const PositiveFloat &X); + PositiveFloat &operator-=(const PositiveFloat &X); + PositiveFloat &operator*=(const PositiveFloat &X); + PositiveFloat &operator/=(const PositiveFloat &X); + PositiveFloat &operator<<=(int16_t Shift) { + return shiftLeft(Shift); + } + PositiveFloat &operator>>=(int16_t Shift) { + return shiftRight(Shift); + } + +private: + PositiveFloat &shiftLeft(int32_t Shift); + PositiveFloat &shiftRight(int32_t Shift); + PositiveFloat normalizeExponents(PositiveFloat X); + +public: + + /// \brief Scale a large number accurately. + /// + /// Scale N (multiply it by this). Uses full precision multiplication, even + /// if Width is smaller than 64, so information is not lost. + uint64_t scale(uint64_t N) const; + uint64_t scaleByInverse(uint64_t N) const { + // TODO: implement directly, rather than relying on inverse. Inverse is + // expensive. + return inverse().scale(N); + } + int64_t scale(int64_t N) const { + std::pair Unsigned = splitSigned(N); + return joinSigned(scale(Unsigned.first), Unsigned.second); + } + int64_t scaleByInverse(int64_t N) const { + std::pair Unsigned = splitSigned(N); + return joinSigned(scaleByInverse(Unsigned.first), Unsigned.second); + } + + int compare(const PositiveFloat &X) const; + int compareTo(uint64_t N) const { + PositiveFloat Float = getFloat(N); + int Compare = compare(Float); + if (Width == 64 || Compare != 0) + return Compare; + + // Check for precision loss. We know *this == RoundTrip. + uint64_t RoundTrip = Float.template toInt(); + return N == RoundTrip ? 0 : RoundTrip < N ? -1 : 1; + } + int compareTo(int64_t N) const { + return N < 0 ? 1 : compareTo(uint64_t(N)); + } + + PositiveFloat &invert() { + return *this = PositiveFloat::getFloat(1) / *this; + } + PositiveFloat inverse() const { + return PositiveFloat(*this).invert(); + } + +private: + static PositiveFloat getProduct(DigitsType L, DigitsType R); + static PositiveFloat getQuotient(DigitsType Dividend, DigitsType Divisor); + + std::pair lgImpl() const; + static int countLeadingZerosWidth(DigitsType Digits) { + if (Width == 64) + return countLeadingZeros64(Digits); + if (Width == 32) + return countLeadingZeros32(Digits); + return countLeadingZeros32(Digits) + Width - 32; + } + + static PositiveFloat adjustToWidth(uint64_t N, int S) { + assert(S >= MinExponent); + assert(S <= MaxExponent); + if (Width == 64 || N <= DigitsLimits::max()) + return PositiveFloat(N, S); + + // Shift right. + int Shift = 64 - Width - countLeadingZeros64(N); + DigitsType Shifted = N >> Shift; + + // Round. + assert(S + Shift <= MaxExponent); + return getRounded(PositiveFloat(Shifted, S + Shift), + N & UINT64_C(1) << (Shift - 1)); + } + + static PositiveFloat getRounded(PositiveFloat P, bool Round) { + if (!Round) + return P; + if (P.Digits == DigitsLimits::max()) + // Careful of overflow in the exponent. + return PositiveFloat(1, P.Exponent) <<= Width; + return PositiveFloat(P.Digits + 1, P.Exponent); + } +}; + +template +PositiveFloat operator+(const PositiveFloat &L, + const PositiveFloat &R) { + return PositiveFloat(L) += R; +} +template +PositiveFloat operator-(const PositiveFloat &L, + const PositiveFloat &R) { + return PositiveFloat(L) -= R; +} +template +PositiveFloat operator*(const PositiveFloat &L, + const PositiveFloat &R) { + return PositiveFloat(L) *= R; +} +template +PositiveFloat operator/(const PositiveFloat &L, + const PositiveFloat &R) { + return PositiveFloat(L) /= R; +} +template +PositiveFloat operator<<(const PositiveFloat &F, + int16_t Shift) { + return PositiveFloat(F) <<= Shift; +} +template +PositiveFloat operator>>(const PositiveFloat &F, + int16_t Shift) { + return PositiveFloat(F) >>= Shift; +} + +template +raw_ostream &operator<<(raw_ostream &OS, const PositiveFloat &X) { + return X.print(OS, 10); +} + +template +bool operator<(const PositiveFloat &L, uint64_t R) { + return L.compareTo(R) < 0; +} +template +bool operator>(const PositiveFloat &L, uint64_t R) { + return L.compareTo(R) > 0; +} +template +bool operator==(const PositiveFloat &L, uint64_t R) { + return L.compareTo(R) == 0; +} +template +bool operator!=(const PositiveFloat &L, uint64_t R) { + return L.compareTo(R) != 0; +} +template +bool operator<=(const PositiveFloat &L, uint64_t R) { + return L.compareTo(R) <= 0; +} +template +bool operator>=(const PositiveFloat &L, uint64_t R) { + return L.compareTo(R) >= 0; +} + +template +bool operator<(const PositiveFloat &L, int64_t R) { + return L.compareTo(R) < 0; +} +template +bool operator>(const PositiveFloat &L, int64_t R) { + return L.compareTo(R) > 0; +} +template +bool operator==(const PositiveFloat &L, int64_t R) { + return L.compareTo(R) == 0; +} +template +bool operator!=(const PositiveFloat &L, int64_t R) { + return L.compareTo(R) != 0; +} +template +bool operator<=(const PositiveFloat &L, int64_t R) { + return L.compareTo(R) <= 0; +} +template +bool operator>=(const PositiveFloat &L, int64_t R) { + return L.compareTo(R) >= 0; +} + +template +bool operator<(const PositiveFloat &L, uint32_t R) { + return L.compareTo(uint64_t(R)) < 0; +} +template +bool operator>(const PositiveFloat &L, uint32_t R) { + return L.compareTo(uint64_t(R)) > 0; +} +template +bool operator==(const PositiveFloat &L, uint32_t R) { + return L.compareTo(uint64_t(R)) == 0; +} +template +bool operator!=(const PositiveFloat &L, uint32_t R) { + return L.compareTo(uint64_t(R)) != 0; +} +template +bool operator<=(const PositiveFloat &L, uint32_t R) { + return L.compareTo(uint64_t(R)) <= 0; +} +template +bool operator>=(const PositiveFloat &L, uint32_t R) { + return L.compareTo(uint64_t(R)) >= 0; +} + +template +bool operator<(const PositiveFloat &L, int32_t R) { + return L.compareTo(int64_t(R)) < 0; +} +template +bool operator>(const PositiveFloat &L, int32_t R) { + return L.compareTo(int64_t(R)) > 0; +} +template +bool operator==(const PositiveFloat &L, int32_t R) { + return L.compareTo(int64_t(R)) == 0; +} +template +bool operator!=(const PositiveFloat &L, int32_t R) { + return L.compareTo(int64_t(R)) != 0; +} +template +bool operator<=(const PositiveFloat &L, int32_t R) { + return L.compareTo(int64_t(R)) <= 0; +} +template +bool operator>=(const PositiveFloat &L, int32_t R) { + return L.compareTo(int64_t(R)) >= 0; +} + +template +bool operator<(uint64_t L, const PositiveFloat &R) { return R > L; } +template +bool operator>(uint64_t L, const PositiveFloat &R) { return R < L; } +template +bool operator==(uint64_t L, const PositiveFloat &R) { return R == L; } +template +bool operator<=(uint64_t L, const PositiveFloat &R) { return R >= L; } +template +bool operator>=(uint64_t L, const PositiveFloat &R) { return R <= L; } +template +bool operator!=(uint64_t L, const PositiveFloat &R) { return R != L; } +template +bool operator<(int64_t L, const PositiveFloat &R) { return R > L; } +template +bool operator>(int64_t L, const PositiveFloat &R) { return R < L; } +template +bool operator==(int64_t L, const PositiveFloat &R) { return R == L; } +template +bool operator<=(int64_t L, const PositiveFloat &R) { return R >= L; } +template +bool operator>=(int64_t L, const PositiveFloat &R) { return R <= L; } +template +bool operator!=(int64_t L, const PositiveFloat &R) { return R != L; } +template +bool operator<(uint32_t L, const PositiveFloat &R) { return R > L; } +template +bool operator>(uint32_t L, const PositiveFloat &R) { return R < L; } +template +bool operator==(uint32_t L, const PositiveFloat &R) { return R == L; } +template +bool operator<=(uint32_t L, const PositiveFloat &R) { return R >= L; } +template +bool operator>=(uint32_t L, const PositiveFloat &R) { return R <= L; } +template +bool operator!=(uint32_t L, const PositiveFloat &R) { return R != L; } +template +bool operator<(int32_t L, const PositiveFloat &R) { return R > L; } +template +bool operator>(int32_t L, const PositiveFloat &R) { return R < L; } +template +bool operator==(int32_t L, const PositiveFloat &R) { return R == L; } +template +bool operator<=(int32_t L, const PositiveFloat &R) { return R >= L; } +template +bool operator>=(int32_t L, const PositiveFloat &R) { return R <= L; } +template +bool operator!=(int32_t L, const PositiveFloat &R) { return R != L; } + +template +uint64_t PositiveFloat::scale(uint64_t N) const { + if (Width == 64 || N <= DigitsLimits::max()) + return (getFloat(N) * *this).template toInt(); + std::pair Lg = lgImpl(); + if (extractLgFloor(Lg) >= 64) + return UINT64_MAX; + if (extractLgCeiling(Lg) <= -64) + return 0; + + uint64_t Result = 0; + for (int Bit = 0; Bit < 64; Bit += Width) { + PositiveFloat Digit = getFloat(N & DigitsLimits::max() << Bit); + Digit *= *this; + + uint64_t Sum = Result + (Digit.toInt() >> Bit); + if (Sum < Result) + return UINT64_MAX; + Result = Sum; + } + return Result; +} + +template +PositiveFloat +PositiveFloat::getProduct(DigitsType L, DigitsType R) { + // Check for zero. + if (!L || !R) + return getZero(); + + // Check for numbers that we can compute with 64-bit math. + if (Width <= 32) + return adjustToWidth(uint64_t(L) * uint64_t(R), 0); + + // Do the full thing. + return PositiveFloat(multiply64(L, R)); +} +template +PositiveFloat +PositiveFloat::getQuotient(DigitsType Dividend, DigitsType Divisor) { + // Check for zero. + if (!Dividend) + return getZero(); + if (!Divisor) + return getLargest(); + + if (Width == 64) + return PositiveFloat(divide64(Dividend, Divisor)); + + // We can compute this with 64-bit math. + int Shift = countLeadingZeros64(Dividend); + uint64_t Shifted = uint64_t(Dividend) << Shift; + uint64_t Quotient = Shifted / Divisor; + + // If Quotient needs to be shifted, then adjustToWidth will round. + if (Quotient > DigitsLimits::max()) + return adjustToWidth(Quotient, -Shift); + + // Round based on the value of the next bit. + return getRounded(PositiveFloat(Quotient, -Shift), + Shifted % Divisor >= getHalf(Divisor)); +} + +template +template +IntT PositiveFloat::toInt() const { + typedef std::numeric_limits Limits; + if (*this < 1) + return 0; + if (*this >= Limits::max()) + return Limits::max(); + + IntT N = Digits; + if (Exponent > 0) { + assert(size_t(Exponent) < sizeof(IntT)*8); + return N << Exponent; + } + if (Exponent < 0) { + assert(size_t(-Exponent) < sizeof(IntT)*8); + return N >> -Exponent; + } + return N; +} + +template +std::pair PositiveFloat::lgImpl() const { + if (isZero()) + return std::make_pair(INT32_MIN, 0); + + // Get the floor of the lg of Digits. + int32_t LocalFloor = Width - countLeadingZerosWidth(Digits) - 1; + + // Get the floor of the lg of this. + int32_t Floor = Exponent + LocalFloor; + if (Digits == UINT64_C(1) << LocalFloor) + return std::make_pair(Floor, 0); + + // Round based on the next digit. + bool Round = Digits & UINT64_C(1) << (LocalFloor - 1); + return std::make_pair(Floor + Round, Round ? 1 : -1); +} + +template +PositiveFloat +PositiveFloat::normalizeExponents(PositiveFloat X) { + if (isZero() || X.isZero()) + return X; + + if (Exponent > X.Exponent) { + // Reverse the arguments. + *this = X.normalizeExponents(*this); + return X; + } + + if (Exponent == X.Exponent) + return X; + + int ExponentDiff = getDiff(Exponent, X.Exponent); + if (ExponentDiff >= 2*Width) { + *this = getZero(); + return X; + } + + // Use up any leading zeros on X, and then shift this. + int ShiftX = std::min(countLeadingZerosWidth(X.Digits), ExponentDiff); + int ShiftThis = ExponentDiff - ShiftX; + + if (ShiftThis >= Width) { + *this = getZero(); + return X; + } + + X.Digits <<= ShiftX; + X.Exponent -= ShiftX; + Digits >>= ShiftThis; + Exponent += ShiftThis; + return X; +} + +template +PositiveFloat & +PositiveFloat::operator+=(const PositiveFloat &X) { + if (isLargest() || X.isZero()) + return *this; + if (isZero() || X.isLargest()) + return *this = X; + + // Normalize exponents. + PositiveFloat Scaled = normalizeExponents(X); + + // Check for zero again. + if (isZero()) + return *this = Scaled; + if (Scaled.isZero()) + return *this; + + // Compute sum. + DigitsType Sum = Digits + Scaled.Digits; + bool DidOverflow = Sum < Digits || Sum < Scaled.Digits; + Digits = Sum; + if (!DidOverflow) + return *this; + + if (Exponent == MaxExponent) + return *this = getLargest(); + + ++Exponent; + Digits = Digits >> 1 | UINT64_C(1) << (Width - 1); + + return *this; +} +template +PositiveFloat & +PositiveFloat::operator-=(const PositiveFloat &X) { + if (X.isZero()) + return *this; + if (*this <= X) + return *this = getZero(); + + // Normalize exponents. + PositiveFloat Scaled = normalizeExponents(X); + assert(Digits >= Scaled.Digits); + + // Compute difference. + if (!Scaled.isZero()) { + Digits -= Scaled.Digits; + return *this; + } + + // Check if X just barely lost its last bit. E.g., for 32-bit: + // + // 1*2^32 - 1*2^0 == 0xffffffff != 1*2^32 + if (*this == PositiveFloat(1, X.lgFloor() + Width)) { + Digits = DigitsType(0) - 1; + --Exponent; + } + return *this; +} +template +PositiveFloat & +PositiveFloat::operator*=(const PositiveFloat &X) { + if (isZero()) + return *this; + if (X.isZero()) + return *this = X; + + // Save the exponents. + int32_t Exponents = int32_t(Exponent) + int32_t(X.Exponent); + + // Get the raw product. + *this = getProduct(Digits, X.Digits); + + // Combine with exponents. + return *this <<= Exponents; +} +template +PositiveFloat & +PositiveFloat::operator/=(const PositiveFloat &X) { + if (isZero()) + return *this; + if (X.isZero()) + return *this = getLargest(); + + // Save the exponents. + int32_t Exponents = int32_t(Exponent) + -int32_t(X.Exponent); + + // Get the raw quotient. + *this = getQuotient(Digits, X.Digits); + + // Combine with exponents. + return *this <<= Exponents; +} +template +PositiveFloat & +PositiveFloat::shiftLeft(int32_t Shift) { + if (Shift < 0) + return shiftRight(-Shift); + if (!Shift || isZero()) + return *this; + + // Shift as much as we can in the exponent. + int16_t ExponentShift = std::min(Shift, MaxExponent - Exponent); + Exponent += ExponentShift; + if (ExponentShift == Shift) + return *this; + + // Check this late, since it's rare. + if (isLargest()) + return *this; + + // Shift as far as possible. + int32_t RawShift = std::min(Shift, countLeadingZerosWidth(Digits)); + if (RawShift + ExponentShift < Shift) + // Saturate. + return *this = getLargest(); + + Digits <<= Shift; + return *this; +} + +template +PositiveFloat & +PositiveFloat::shiftRight(int32_t Shift) { + if (Shift < 0) + return shiftLeft(-Shift); + if (!Shift || isZero()) + return *this; + + // Shift as much as we can in the exponent. + int16_t ExponentShift = std::min(Shift, Exponent - MinExponent); + Exponent -= ExponentShift; + if (ExponentShift == Shift) + return *this; + + // Shift as far as possible. + int32_t RawShift = Shift - ExponentShift; + if (RawShift >= Width) + // Saturate. + return *this = getZero(); + + // May result in zero. + Digits >>= Shift; + return *this; +} + +template +int PositiveFloat::compare(const PositiveFloat &X) const { + // Check for zero. + if (isZero()) + return X.isZero() ? 0 : -1; + if (X.isZero()) + return 1; + + // Check for the scale. Use lgFloor to be sure that the exponent difference + // is always lower than 64. + int32_t lgL = lgFloor(), lgR = X.lgFloor(); + if (lgL != lgR) + return lgL < lgR ? -1 : 1; + + // Compare digits. + if (Exponent < X.Exponent) + return PositiveFloatBase::compare(Digits, X.Digits, X.Exponent - Exponent); + + return -PositiveFloatBase::compare(X.Digits, Digits, Exponent - X.Exponent); +} + +template +struct isPodLike> { static const bool value = true; }; + +} + +#endif Index: lib/Analysis/BlockFrequencyInfo.cpp =================================================================== --- lib/Analysis/BlockFrequencyInfo.cpp +++ lib/Analysis/BlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===// +//===- BlockFrequencyInfo.cpp - Block Frequency Analysis ------------------===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,9 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "block-freq" #include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" @@ -160,7 +161,7 @@ } const Function *BlockFrequencyInfo::getFunction() const { - return BFI ? BFI->Fn : nullptr; + return BFI ? BFI->getFunction() : nullptr; } raw_ostream &BlockFrequencyInfo:: Index: lib/Analysis/BlockFrequencyInfoImpl.cpp =================================================================== --- /dev/null +++ lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -0,0 +1,891 @@ +//===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "block-freq" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; +typedef BlockFrequencyInfoImplBase::Distribution Distribution; +typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList; +typedef BlockFrequencyInfoImplBase::Float Float; +typedef BlockFrequencyInfoImplBase::PackagedLoopData PackagedLoopData; +typedef BlockFrequencyInfoImplBase::Weight Weight; +typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData; + +/// \brief Stack entry describing a loop. +struct LoopStackEntry { + BlockNode LoopHead; + BlockNode LatestBackedge; +}; + +/// \brief Stack describing currently open loops. +struct LoopStack { + std::vector OpenLoops; + + void push(const BlockNode &LoopHead, const BlockNode &LatestBackedge) { + assert(LoopHead.isValid()); + assert(LatestBackedge.isValid()); + OpenLoops.push_back({LoopHead, LatestBackedge}); + } + void pop(const BlockNode &FinishedNode) { + while (!empty() && top().LatestBackedge <= FinishedNode) + OpenLoops.pop_back(); + } + bool empty() const { return OpenLoops.empty(); } + const LoopStackEntry &top() const { + assert(!OpenLoops.empty()); + return OpenLoops.back(); + } + void adjustAfterFinishing(const BlockNode &Current, + const BlockNode &LatestBackedge) { + pop(Current); + if (LatestBackedge.isValid() && LatestBackedge > Current) + push(Current, LatestBackedge); + } +}; + +/// \brief Dithering mass distributer. +/// +/// This class splits up a single mass into portions by weight, dithering to +/// spread out error. No mass is lost. The dithering precision depends on the +/// precision of the product of \a BlockMass and \a BranchProbability. +/// +/// The distribution algorithm follows. +/// +/// 1. Initialize by saving the sum of the weights in \a RemWeight and the +/// mass to distribute in \a RemMass. +/// +/// 2. For each portion: +/// +/// 1. Construct a branch probability, P, as the portion's weight divided +/// by the current value of \a RemWeight. +/// 2. Calculate the portion's mass as \a RemMass times P. +/// 3. Update \a RemWeight and \a RemMass at each portion by subtracting +/// the current portion's weight and mass. +/// +/// Mass is distributed in two ways: full distribution and forward +/// distribution. The latter ignores backedges, and uses the parallel fields +/// \a RemForwardWeight and \a RemForwardMass. +struct DitheringDistributer { + uint32_t RemWeight; + uint32_t RemForwardWeight; + + BlockMass RemMass; + BlockMass RemForwardMass; + + DitheringDistributer(Distribution &Dist, const BlockMass &Mass); + + BlockMass takeLocalMass(uint32_t Weight) { + (void)takeMass(Weight); + return takeForwardMass(Weight); + } + BlockMass takeExitMass(uint32_t Weight) { + (void)takeForwardMass(Weight); + return takeMass(Weight); + } + BlockMass takeBackedgeMass(uint32_t Weight) { + return takeMass(Weight); + } + +private: + BlockMass takeForwardMass(uint32_t Weight); + BlockMass takeMass(uint32_t Weight); +}; + +} + +DitheringDistributer::DitheringDistributer(Distribution &Dist, + const BlockMass &Mass) { + Dist.normalize(); + RemWeight = Dist.Total; + RemForwardWeight = Dist.ForwardTotal; + RemMass = Mass; + RemForwardMass = Dist.ForwardTotal ? Mass : BlockMass(); +} + +BlockMass DitheringDistributer::takeForwardMass(uint32_t Weight) { + // Compute the amount of mass to take. + assert(Weight && "invalid weight"); + assert(Weight <= RemForwardWeight); + BlockMass Mass = RemForwardMass * BranchProbability(Weight, RemForwardWeight); + + // Decrement totals (dither). + RemForwardWeight -= Weight; + RemForwardMass -= Mass; + return Mass; +} +BlockMass DitheringDistributer::takeMass(uint32_t Weight) { + assert(Weight && "invalid weight"); + assert(Weight <= RemWeight); + BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight); + + // Decrement totals (dither). + RemWeight -= Weight; + RemMass -= Mass; + return Mass; +} + +void Distribution::add(const BlockNode &Node, uint64_t Amount, + Weight::DistType Type) { + assert(Amount && "invalid weight of 0"); + uint64_t NewTotal = Total + Amount; + + // Check for overflow. It should be impossible to overflow twice. + bool IsOverflow = NewTotal < Total; + assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow"); + DidOverflow |= IsOverflow; + + // Update the total. + Total = NewTotal; + + // Save the weight. + Weight W; + W.TargetNode = Node; + W.Amount = Amount; + W.Type = Type; + Weights.push_back(W); + + if (Type == Weight::Backedge) + return; + + // Update forward total. Don't worry about overflow here, since then Total + // will exceed 32-bits and they'll both be recomputed in normalize(). + ForwardTotal += Amount; +} + +static void combineWeight(Weight &W, const Weight &OtherW) { + assert(OtherW.TargetNode.isValid()); + if (!W.Amount) { + W = OtherW; + return; + } + assert(W.Type == OtherW.Type); + assert(W.TargetNode == OtherW.TargetNode); + assert(W.Amount < W.Amount + OtherW.Amount); + W.Amount += OtherW.Amount; +} +static void combineWeightsBySorting(WeightList &Weights) { + // Sort so edges to the same node are adjacent. + std::sort(Weights.begin(), Weights.end(), + [](const Weight &L, const Weight &R) { + return L.TargetNode < R.TargetNode; + }); + + // Combine adjacent edges. + WeightList::iterator O = Weights.begin(); + for (WeightList::const_iterator I = O, L = O, E = Weights.end(); + I != E; ++O, (I = L)) { + *O = *I; + + // Find the adjacent weights to the same node. + for (++L; L != E && I->TargetNode == L->TargetNode; ++L) + combineWeight(*O, *L); + } + + // Erase extra entries. + Weights.erase(O, Weights.end()); + return; +} +static void combineWeightsByHashing(WeightList &Weights) { + // Collect weights into a DenseMap. + typedef DenseMap HashTable; + HashTable Combined(NextPowerOf2(2 * Weights.size())); + for (const Weight &W : Weights) + combineWeight(Combined[W.TargetNode.Index], W); + + // Check whether anything changed. + if (Weights.size() == Combined.size()) + return; + + // Fill in the new weights. + Weights.clear(); + Weights.reserve(Combined.size()); + for (const auto &I : Combined) + Weights.push_back(I.second); +} +static void combineWeights(WeightList &Weights) { + // Use a hash table for many successors to keep this linear. + if (Weights.size() > 128) { + combineWeightsByHashing(Weights); + return; + } + + combineWeightsBySorting(Weights); +} +static uint64_t shiftRightAndRound(uint64_t N, int Shift) { + assert(Shift >= 0); + assert(Shift < 64); + if (!Shift) + return N; + return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1)); +} +void Distribution::normalize() { + // Early exit for termination nodes. + if (Weights.empty()) + return; + + // Only bother if there are multiple successors. + if (Weights.size() > 1) + combineWeights(Weights); + + // Early exit when combined into a single successor. + if (Weights.size() == 1) { + Total = 1; + ForwardTotal = Weights.front().Type != Weight::Backedge; + Weights.front().Amount = 1; + return; + } + + // Determine how much to shift right so that the total fits into 32-bits. + // + // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1 + // for each weight can cause a 32-bit overflow. + int Shift = 0; + if (DidOverflow) + Shift = 33; + else if (Total > UINT32_MAX) + Shift = 33 - countLeadingZeros(Total); + + // Early exit if nothing needs to be scaled. + if (!Shift) + return; + + // Recompute the total through accumulation (rather than shifting it) so that + // it's accurate after shifting. ForwardTotal is dirty here anyway. + Total = 0; + ForwardTotal = 0; + + // Sum the weights to each node and shift right if necessary. + for (Weight &W : Weights) { + // Scale down below UINT32_MAX. Since Shift is larger than necessary, we + // can round here without concern about overflow. + assert(W.TargetNode.isValid()); + W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift)); + assert(W.Amount <= UINT32_MAX); + + // Update the total. + Total += W.Amount; + if (W.Type == Weight::Backedge) + continue; + + // Update the forward total. + ForwardTotal += W.Amount; + } + assert(Total <= UINT32_MAX); +} + +void BlockFrequencyInfoImplBase::clear() { + *this = BlockFrequencyInfoImplBase(); +} + +/// \brief Clear all memory not needed downstream. +/// +/// Releases all memory not used downstream. In particular, saves Freqs. +static void cleanup(BlockFrequencyInfoImplBase &BFI) { + std::vector SavedFreqs(std::move(BFI.Freqs)); + BFI.clear(); + BFI.Freqs = std::move(SavedFreqs); +} + +std::string BlockFrequencyInfoImplBase::getBlockName(const BasicBlock *BB) { + return BB->getName().str(); +} + +std::string BlockFrequencyInfoImplBase:: +getBlockName(const MachineBasicBlock *MBB) { + std::string str; + raw_string_ostream ss(str); + ss << "BB#" << MBB->getNumber(); + + if (const BasicBlock *BB = MBB->getBasicBlock()) + ss << " derived from LLVM BB " << BB->getName(); + + return ss.str(); +} + +/// \brief Get a possibly packaged node. +/// +/// Get the node currently representing Node, which could be a containing +/// loop. +/// +/// This function should only be called when distributing mass. As long as +/// there are no irreducilbe edges to Node, then it will have complexity O(1) +/// in this context. +/// +/// In general, the complexity is O(L), where L is the number of loop headers +/// Node has been packaged into. Since this method is called in the context +/// of distributing mass, L will be the number of loop headers an early exit +/// edge jumps out of. +static BlockNode getPackagedNode(const BlockFrequencyInfoImplBase &BFI, + const BlockNode &Node) { + assert(Node.isValid()); + if (!BFI.Working[Node.Index].IsPackaged) + return Node; + if (!BFI.Working[Node.Index].ContainingLoop.isValid()) + return Node; + return getPackagedNode(BFI, BFI.Working[Node.Index].ContainingLoop); +} + +/// \brief Get the appropriate mass for a possible pseudo-node loop package. +/// +/// Get appropriate mass for Node. If Node is a loop-header (whose loop has +/// been packaged), returns the mass of its pseudo-node. If it's a node inside +/// a packaged loop, it returns the loop's pseudo-node. +static BlockMass &getPackageMass(BlockFrequencyInfoImplBase &BFI, + const BlockNode &Node) { + assert(Node.isValid()); + assert(!BFI.Working[Node.Index].IsPackaged); + if (!BFI.Working[Node.Index].IsAPackage) + return BFI.Working[Node.Index].Mass; + + return BFI.getLoopPackage(Node).Mass; +} + +void BlockFrequencyInfoImplBase:: +addToDist(Distribution &Dist, const BlockNode &LoopHead, const BlockNode &Succ, + uint64_t Weight) { + if (!Weight) + Weight = 1; + +#ifndef NDEBUG + auto debugSuccessor = [&](const char *Type, const BlockNode &Resolved) { + dbgs() << " =>" << " [" << Type << "] weight = " << Weight; + if (Succ != LoopHead) + dbgs() << ", succ = " << getBlockName(Succ); + if (Resolved != Succ) + dbgs() << ", resolved = " << getBlockName(Resolved); + dbgs() << "\n"; + }; + (void)debugSuccessor; +#endif + + if (Succ == LoopHead) { + DEBUG(debugSuccessor("backedge", Succ)); + Dist.addBackedge(LoopHead, Weight); + return; + } + BlockNode Resolved = getPackagedNode(*this, Succ); + assert(Resolved != LoopHead); + + if (Working[Resolved.Index].ContainingLoop == LoopHead) { + DEBUG(debugSuccessor(" local ", Resolved)); + Dist.addLocal(Resolved, Weight); + return; + } + DEBUG(debugSuccessor(" exit ", Resolved)); + Dist.addExit(Resolved, Weight); +} + +void BlockFrequencyInfoImplBase:: +addLoopSuccessorsToDist(const BlockNode &LoopHead, + const BlockNode &LocalLoopHead, + Distribution &Dist) { + PackagedLoopData &LoopPackage = getLoopPackage(LocalLoopHead); + const PackagedLoopData::ExitMap &Exits = LoopPackage.Exits; + + // Copy the exit map into Dist. + for (const auto &I : Exits) + addToDist(Dist, LoopHead, I.first, I.second.getMass()); + + // We don't need this map any more. Clear it to prevent quadratic memory + // usage in deeply nested loops with irreducible control flow. + LoopPackage.Exits.clear(); +} + +/// \brief Get the maximum allowed loop scale. +/// +/// Gives the maximum number of estimated iterations allowed for a loop. +/// Downstream users have trouble with very large numbers (even within +/// 64-bits). Perhaps they can be changed to use PositiveFloat. +/// +/// TODO: change downstream users so that this can be increased or removed. +static Float getMaxLoopScale() { return Float(1, 12); } + +/// \brief Compute the loop scale for a loop. +static void computeLoopScale(BlockFrequencyInfoImplBase &BFI, + const BlockNode &LoopHead) { + // Compute loop scale. + DEBUG(dbgs() << "compute-loop-scale: " << BFI.getBlockName(LoopHead) << "\n"); + + // LoopScale == 1 / ExitMass + // ExitMass == HeadMass - BackedgeMass + PackagedLoopData &LoopPackage = BFI.getLoopPackage(LoopHead); + BlockMass ExitMass = BlockMass::getFull() - LoopPackage.BackedgeMass; + + // Block scale stores the inverse of the scale. + LoopPackage.Scale = ExitMass.toFloat().inverse(); + + DEBUG(dbgs() + << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() + << " - " << LoopPackage.BackedgeMass << ")\n" + << " - scale = " << LoopPackage.Scale << "\n"); + + if (LoopPackage.Scale > getMaxLoopScale()) { + LoopPackage.Scale = getMaxLoopScale(); + DEBUG(dbgs() << " - reduced-to-max-scale: " << getMaxLoopScale() << "\n"); + } +} + +/// \brief Package up a loop. +static void packageLoop(BlockFrequencyInfoImplBase &BFI, + const BlockNode &LoopHead) { + DEBUG(dbgs() << "packaging-loop: " << BFI.getBlockName(LoopHead) << "\n"); + BFI.Working[LoopHead.Index].IsAPackage = true; + for (const BlockNode &M : BFI.getLoopPackage(LoopHead).Members) { + DEBUG(dbgs() << " - node: " << BFI.getBlockName(M.Index) << "\n"); + BFI.Working[M.Index].IsPackaged = true; + } +} + +void BlockFrequencyInfoImplBase:: +distributeMass(const BlockNode &Source, const BlockNode &LoopHead, + Distribution &Dist) { + BlockMass Mass = getPackageMass(*this, Source); + DEBUG(dbgs() << " => mass: " << Mass + << " ( general | forward )\n"); + + // Distribute mass to successors as laid out in Dist. + DitheringDistributer D(Dist, Mass); + +#ifndef NDEBUG + auto debugAssign = [&](const BlockNode &T, const BlockMass &M, + const char *Desc) { + dbgs() << " => assign " << M << " (" << D.RemMass << "|" + << D.RemForwardMass << ")"; + if (Desc) + dbgs() << " [" << Desc << "]"; + if (T.isValid()) + dbgs() << " to " << getBlockName(T); + dbgs() << "\n"; + }; + (void)debugAssign; +#endif + + PackagedLoopData *LoopPackage = 0; + if (LoopHead.isValid()) + LoopPackage = &getLoopPackage(LoopHead); + for (const Weight &W : Dist.Weights) { + // Check for a local edge (forward and non-exit). + if (W.Type == Weight::Local) { + BlockMass Local = D.takeLocalMass(W.Amount); + getPackageMass(*this, W.TargetNode) += Local; + DEBUG(debugAssign(W.TargetNode, Local, nullptr)); + continue; + } + + // Backedges and exits only make sense if we're processing a loop. + assert(LoopPackage && "backedge or exit outside of loop"); + + // Check for a backedge. + if (W.Type == Weight::Backedge) { + BlockMass Back = D.takeBackedgeMass(W.Amount); + LoopPackage->BackedgeMass += Back; + DEBUG(debugAssign(BlockNode(), Back, "back")); + continue; + } + + // This must be an exit. + assert(W.Type == Weight::Exit); + BlockMass Exit = D.takeExitMass(W.Amount); + LoopPackage->Exits.push_back(std::make_pair(W.TargetNode, Exit)); + DEBUG(debugAssign(W.TargetNode, Exit, "exit")); + } +} + +static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, + const Float &Min, const Float &Max) { + // Scale the Factor to a size that creates integers. Ideally, integers would + // be scaled so that Max == UINT64_MAX so that they can be best + // differentiated. However, the register allocator currently deals poorly + // with large numbers. Instead, push Min up a little from 1 to give some + // room to differentiate small, unequal numbers. + // + // TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max. + Float ScalingFactor = Min.inverse(); + if ((Max / Min).lg() < 60) + ScalingFactor <<= 3; + + // Translate the floats to integers. + DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max + << ", factor = " << ScalingFactor << "\n"); + for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) { + Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor; + BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt()); + DEBUG(dbgs() << " - " << BFI.getBlockName(Index) + << ": float = " << BFI.Freqs[Index].Floating + << ", scaled = " << Scaled + << ", int = " << BFI.Freqs[Index].Integer << "\n"); + } +} + +static void scaleBlockData(BlockFrequencyInfoImplBase &BFI, + const BlockNode &Node, const PackagedLoopData &Loop) { + Float F = Loop.Mass.toFloat() * Loop.Scale; + + Float &Current = BFI.Freqs[Node.Index].Floating; + Float Updated = Current * F; + + DEBUG(dbgs() << " - " << BFI.getBlockName(Node) << ": " + << Current << " => " << Updated << "\n"); + + Current = Updated; +} + +/// \brief Unwrap a loop package. +/// +/// Visits all the members of a loop, adjusting their BlockData according to +/// the loop's pseudo-node. +static void unwrapLoopPackage(BlockFrequencyInfoImplBase &BFI, + const BlockNode &Head) { + assert(Head.isValid()); + + PackagedLoopData &LoopPackage = BFI.getLoopPackage(Head); + DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getBlockName(Head) + << ": mass = " << LoopPackage.Mass + << ", scale = " << LoopPackage.Scale << "\n"); + scaleBlockData(BFI, Head, LoopPackage); + + // Propagate the head scale through the loop. Since members are visited in + // RPO, the head scale will be updated by the loop scale first, and then the + // final head scale will be used for updated the rest of the members. + for (const BlockNode &M : LoopPackage.Members) { + const FrequencyData &HeadData = BFI.Freqs[Head.Index]; + FrequencyData &Freqs = BFI.Freqs[M.Index]; + Float NewFreq = Freqs.Floating * HeadData.Floating; + DEBUG(dbgs() << " - " << BFI.getBlockName(M) << ": " + << Freqs.Floating << " => " << NewFreq << "\n"); + Freqs.Floating = NewFreq; + } +} + +void BlockFrequencyInfoImplBase::finalizeMetrics() { + // Set initial frequencies from loop-local masses. + for (size_t Index = 0; Index < Working.size(); ++Index) + Freqs[Index].Floating = Working[Index].Mass.toFloat(); + + // Unwrap loop packages in reverse post-order, tracking min and max + // frequencies. + auto Min = Float::getLargest(); + auto Max = Float::getZero(); + for (size_t Index = 0; Index < Working.size(); ++Index) { + if (Working[Index].isLoopHeader()) + unwrapLoopPackage(*this, BlockNode(Index)); + + // Update max scale. + Min = std::min(Min, Freqs[Index].Floating); + Max = std::max(Max, Freqs[Index].Floating); + } + + // Convert to integers. + convertFloatingToInteger(*this, Min, Max); + + // Clean up data structures. + cleanup(*this); + + // Print out the final stats. + DEBUG(dump()); +} + +static size_t numberLoops(BlockFrequencyInfoImplBase &BFI) { + size_t NumLoops = 0; + for (size_t Index = 0; Index < BFI.Working.size(); ++Index) { + auto &Working = BFI.Working[Index]; + if (Working.isLoopHeader()) { + Working.LoopIndex = NumLoops++; + DEBUG(dbgs() << " - loop = " << BFI.getBlockName(Index) + << ", backedge = " + << BFI.getBlockName(Working.LatestBackedge) << "\n"); + } + } + return NumLoops; +} +static void addMembersToLoops(BlockFrequencyInfoImplBase &BFI) { + for (size_t Index = 0; Index < BFI.Working.size(); ++Index) { + auto &Working = BFI.Working[Index]; + if (Working.hasLoopHeader()) { + auto &Package = BFI.getLoopPackage(Working.ContainingLoop); + Package.Members.push_back(Index); + DEBUG(dbgs() << " - loop = " << BFI.getBlockName(Working.ContainingLoop) + << ", member = " << BFI.getBlockName(Index) << "\n"); + } + } +} +void BlockFrequencyInfoImplBase::initializeLoopPackages() { + size_t NumLoops = numberLoops(*this); + if (!NumLoops) + return; + PackagedLoops.assign(NumLoops, PackagedLoopData()); + addMembersToLoops(*this); +} + +BlockFrequency BlockFrequencyInfoImplBase:: +getBlockFreq(const BlockNode &Node) const { + if (!Node.isValid()) + return 0; + return Freqs[Node.Index].Integer; +} +Float +BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { + if (!Node.isValid()) + return Float::getZero(); + return Freqs[Node.Index].Floating; +} + +std::string BlockFrequencyInfoImplBase:: +getBlockName(const BlockNode &Node) const { + return std::string(); +} + +raw_ostream &BlockFrequencyInfoImplBase:: +printBlockFreq(raw_ostream &OS, const BlockNode &Node) const { + return OS << getFloatingBlockFreq(Node); +} + +raw_ostream &BlockFrequencyInfoImplBase:: +printBlockFreq(raw_ostream &OS, const BlockFrequency &Freq) const { + Float Block(Freq.getFrequency(), 0); + Float Entry(getEntryFreq(), 0); + + return OS << Block / Entry; +} + +template +void BlockFrequencyInfoImpl:: +doFunction(const FunctionT *F, const BlockProbInfoT *BPI) { + // Save the parameters. + this->BPI = BPI; + this->F = F; + + // Clean up left-over data structures. + BlockFrequencyInfoImplBase::clear(); + RPOT.clear(); + Nodes.clear(); + + // Initialize. + DEBUG(dbgs() + << "\nblock-frequency: " << F->getName() + << "\n=================" << std::string(F->getName().size(), '=') + << "\n"); + initializeRPOT(F); + initializeLoops(); + + // Visit loops in post-order to find thelocal mass distribution, and then do + // the full function. + computeMassInLoops(); + computeMassInFunction(); + finalizeMetrics(); +} + +template +void BlockFrequencyInfoImpl::initializeRPOT(const FunctionT *F) { + const BlockT *Entry = F->begin(); + RPOT.reserve(F->size()); + std::copy(po_begin(Entry), po_end(Entry), std::back_inserter(RPOT)); + std::reverse(RPOT.begin(), RPOT.end()); + + assert(RPOT.size() - 1 <= BlockNode::getMaxIndex() && + "More nodes in function than Block Frequency Info supports"); + + DEBUG(dbgs() << "reverse-post-order-traversal\n"); + for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) { + DEBUG(dbgs() << " - " << getIndex(I) << ": " << getBlockName(*I) << "\n"); + Nodes[*I] = getNode(I); + } + + Working.resize(RPOT.size()); + Freqs.resize(RPOT.size()); +} + +static void updateNodeToMax(BlockNode &Node, const BlockNode &New) { + if (!New.isValid()) + return; + if (!Node.isValid() || Node < New) + Node = New; +} + +/// \brief Register an edge. +/// +/// Determines the loop-relationships of Current and Pred. Called by +/// initializeLoops() to find backedges and propagate containing loops. +/// +/// This is a constant-time operation if there is no irreducible control flow. +/// If there is, then it's linear in the number of loops. +/// +/// \pre If Pred is less than Current (i.e., this is a forward edge), then \a +/// registerPredecessor() has already been called on all of Pred's +/// predecessors. +static void registerPredecessor(BlockFrequencyInfoImplBase &BFI, + LoopStack &Loops, + const BlockNode &Current, + const BlockNode &Pred) { + // Ignore unreachable predecessors. + if (!Pred.isValid()) + return; + + // Deal with backedges. + auto &Working = BFI.Working[Current.Index]; + if (Pred >= Current) { + // Save the backedge if it's the latest one seen so far. + updateNodeToMax(Working.LatestBackedge, Pred); + return; + } + + // Check for top-level nodes. + if (Loops.empty()) + return; + + // Normally, Pred is the head of or inside of the loop on the top of the + // stack. Then Current is too. + if (Pred == Loops.top().LoopHead) { + updateNodeToMax(Working.ContainingLoop, Pred); + return; + } + BlockNode PredLoop = BFI.Working[Pred.Index].ContainingLoop; + if (PredLoop == Loops.top().LoopHead) { + updateNodeToMax(Working.ContainingLoop, PredLoop); + return; + } + + // Visit all of Pred's containing loops to see if Current is also a member. + // If there's no irreducible control flow, then Pred must be from a + // just-popped loop, and this loop will stop in the first iteration. + for (; PredLoop.isValid(); + PredLoop = BFI.Working[PredLoop.Index].ContainingLoop) + if (BFI.Working[PredLoop.Index].LatestBackedge >= Current) { + updateNodeToMax(Working.ContainingLoop, PredLoop); + return; + } +} + +template +void BlockFrequencyInfoImpl::initializeLoops() { + DEBUG(dbgs() << "loop-detection\n"); + LoopStack Loops; + for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) { + const BlockT *BB = *I; + BlockNode Current = getNode(I); + for (auto PI = Predecessor::child_begin(BB), + PE = Predecessor::child_end(BB); PI != PE; ++PI) + registerPredecessor(*this, Loops, Current, getNode(*PI)); + Loops.adjustAfterFinishing(Current, Working[Current.Index].LatestBackedge); + } + initializeLoopPackages(); +} + +template +void BlockFrequencyInfoImpl::computeMassInLoops() { + for (reverse_iterator RI = rpot_rbegin(), RE = rpot_rend(); RI != RE; ++RI) { + BlockNode Head = getNode(RI); + if (!Working[Head.Index].isLoopHeader()) + continue; + computeMassInLoop(Head); + } +} + +template +void BlockFrequencyInfoImpl:: +computeMassInLoop(const BlockNode &LoopHead) { + // Compute mass in loop. + DEBUG(dbgs() << "compute-mass-in-loop: " << getBlockName(LoopHead) << "\n"); + + Working[LoopHead.Index].Mass = BlockMass::getFull(); + propagateMassToSuccessors(LoopHead, LoopHead); + + for (const BlockNode &M : getLoopPackage(LoopHead).Members) + propagateMassToSuccessors(LoopHead, M); + + computeLoopScale(*this, LoopHead); + packageLoop(*this, LoopHead); +} + +template +void BlockFrequencyInfoImpl::computeMassInFunction() { + // Compute mass in function. + DEBUG(dbgs() << "compute-mass-in-function\n"); + Working[0].Mass = BlockMass::getFull(); + for (rpot_iterator I = rpot_begin(), IE = rpot_end(); I != IE; ++I) { + // Check for nodes that have been packaged. + BlockNode Node = getNode(*I); + if (Working[Node.Index].hasLoopHeader()) + continue; + + propagateMassToSuccessors(BlockNode(), Node); + } +} + +/// In a BasicBlock graph, it's linear to fetch the edge weight given a pointer +/// to the destination block. Instead, pass in the successor index, which is a +/// constant-time lookup. +template +static uint32_t getEdgeWeight(const BranchProbabilityInfo *BPI, + const BasicBlock *Src, const Iterator &I) { + return BPI->getEdgeWeight(Src, I.getSuccessorIndex()); +} +template +static uint32_t getEdgeWeight(const MachineBranchProbabilityInfo *BPI, + const MachineBasicBlock *Src, const Iterator &I) { + return BPI->getEdgeWeight(Src, *I); +} +template +void BlockFrequencyInfoImpl:: +propagateMassToSuccessors(const BlockNode &LoopHead, const BlockNode &Node) { + DEBUG(dbgs() << " - node: " << getBlockName(Node) << "\n"); + // Calculate probability for successors. + Distribution Dist; + if (Node != LoopHead && Working[Node.Index].isLoopHeader()) + addLoopSuccessorsToDist(LoopHead, Node, Dist); + else { + const BlockT *BB = getBlock(Node); + for (auto SI = Successor::child_begin(BB), SE = Successor::child_end(BB); + SI != SE; ++SI) + addToDist(Dist, LoopHead, getNode(*SI), getEdgeWeight(BPI, BB, SI)); + } + + // Distribute mass to successors, saving exit and backedge data in the + // loop header. + distributeMass(Node, LoopHead, Dist); +} + +template +raw_ostream &BlockFrequencyInfoImpl::print(raw_ostream &OS) const { + if (!F) + return OS; + OS << "block-frequency-info: " << F->getName() << "\n"; + for (const BlockT &BB : *F) + OS << " - " << getBlockName(&BB) + << ": float = " << getFloatingBlockFreq(&BB) + << ", int = " << getBlockFreq(&BB).getFrequency() + << "\n"; + + // Add an extra newline for readability. + OS << "\n"; + return OS; +} + +// Explicit instantiations must come at the end of the file. +namespace llvm { +template class BlockFrequencyInfoImpl +; +template class BlockFrequencyInfoImpl +; +} Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -7,6 +7,7 @@ Analysis.cpp BasicAliasAnalysis.cpp BlockFrequencyInfo.cpp + BlockFrequencyInfoImpl.cpp BranchProbabilityInfo.cpp CFG.cpp CFGPrinter.cpp Index: lib/CodeGen/MachineBlockFrequencyInfo.cpp =================================================================== --- lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//====------ MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis ------====// +//===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===// // // The LLVM Compiler Infrastructure // @@ -11,9 +11,11 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "block-freq" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/Passes.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" @@ -168,7 +170,7 @@ } const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { - return MBFI ? MBFI->Fn : nullptr; + return MBFI ? MBFI->getFunction() : nullptr; } raw_ostream & Index: lib/Support/BlockMass.cpp =================================================================== --- /dev/null +++ lib/Support/BlockMass.cpp @@ -0,0 +1,67 @@ +//===- BlockMass.cpp - Block mass -----------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements BlockMass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BlockMass.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/PositiveFloat.h" + +using namespace llvm; + +BlockMass &BlockMass::operator*=(const BranchProbability &P) { + uint32_t N = P.getNumerator(), D = P.getDenominator(); + assert(D || "divide by 0"); + assert(N <= D || "fraction greater than 1"); + + // Fast path for multiplying by 1.0. + if (!Mass || N == D) + return *this; + + // Get as much precision as we can. + int Shift = countLeadingZeros(Mass); + uint64_t ShiftedQuotient = (Mass << Shift) / D; + uint64_t Product = ShiftedQuotient * N >> Shift; + + // Now check for what's lost. + uint64_t Left = ShiftedQuotient * (D - N) >> Shift; + uint64_t Lost = Mass - Product - Left; + + // TODO: prove this assertion. + assert(Lost <= UINT32_MAX); + + // Take the product plus a portion of the spoils. + Mass = Product + Lost * N / D; + return *this; +} + +PositiveFloat BlockMass::toFloat() const { + if (isFull()) + return PositiveFloat(1, 0); + return PositiveFloat(getMass() + 1, -64); +} + +void BlockMass::dump() const { + print(dbgs()); +} + +static char getHexDigit(int N) { + assert(N < 16); + if (N < 10) + return '0' + N; + return 'a' + N - 10; +} +raw_ostream &BlockMass::print(raw_ostream &OS) const { + for (int Digits = 0; Digits < 16; ++Digits) + OS << getHexDigit(Mass >> (60 - Digits*4) & 0xf); + return OS; +} Index: lib/Support/CMakeLists.txt =================================================================== --- lib/Support/CMakeLists.txt +++ lib/Support/CMakeLists.txt @@ -5,6 +5,7 @@ ARMBuildAttrs.cpp Allocator.cpp BlockFrequency.cpp + BlockMass.cpp BranchProbability.cpp circular_raw_ostream.cpp CommandLine.cpp @@ -39,6 +40,7 @@ MemoryObject.cpp MD5.cpp PluginLoader.cpp + PositiveFloat.cpp PrettyStackTrace.cpp Regex.cpp SmallPtrSet.cpp Index: lib/Support/PositiveFloat.cpp =================================================================== --- /dev/null +++ lib/Support/PositiveFloat.cpp @@ -0,0 +1,299 @@ +//===- PositiveFloat.cpp - Simple positive floating point -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements positive floating point numbers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/APFloat.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/PositiveFloat.h" +#include "llvm/Support/raw_ostream.h" + +#include + +using namespace llvm; + +const int PositiveFloatBase::MaxExponent; +const int PositiveFloatBase::MinExponent; + +static void appendDigit(std::string &Str, unsigned D) { + assert(D < 10); + Str += '0' + D % 10; +} + +static void appendNumber(std::string &Str, uint64_t N) { + while (N) { + appendDigit(Str, N % 10); + N /= 10; + } +} + +static bool doesRoundUp(char Digit) { + switch (Digit) { + case '5': case '6': case '7': case '8': case '9': + return true; + default: + return false; + } +} + +static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) { + assert(E >= PositiveFloatBase::MinExponent); + assert(E <= PositiveFloatBase::MaxExponent); + + // Find a new E, but don't let it increase past MaxExponent. + int LeadingZeros = PositiveFloatBase::countLeadingZeros64(D); + int NewE = std::min(PositiveFloatBase::MaxExponent, + E + 63 - LeadingZeros); + int Shift = 63 - (NewE - E); + assert(Shift <= LeadingZeros); + assert(Shift == LeadingZeros || NewE == PositiveFloatBase::MaxExponent); + D <<= Shift; + E = NewE; + + // Check for a denormal. + unsigned AdjustedE = E + 16383; + if (!(D >> 63)) { + assert(E == PositiveFloatBase::MaxExponent); + AdjustedE = 0; + } + + // Build the float and print it. + uint64_t RawBits[2] = { D, AdjustedE }; + APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits)); + SmallVector Chars; + Float.toString(Chars, Precision, 0); + return std::string(Chars.begin(), Chars.end()); +} + +static std::string stripTrailingZeros(std::string Float) { + size_t NonZero = Float.find_last_not_of('0'); + assert(NonZero != std::string::npos && "no . in floating point string"); + + if (Float[NonZero] == '.') + ++NonZero; + + return Float.substr(0, NonZero + 1); +} + +std::string PositiveFloatBase::toString(uint64_t D, int16_t E, int Width, + unsigned Precision) { + if (!D) + return "0.0"; + + // Canonicalize exponent and digits. + uint64_t Above0 = 0; + uint64_t Below0 = 0; + uint64_t Extra = 0; + int ExtraShift = 0; + if (E == 0) { + Above0 = D; + } else if (E > 0) { + if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) { + D <<= Shift; + E -= Shift; + + if (!E) + Above0 = D; + } + } else if (E > -64) { + Above0 = D >> -E; + Below0 = D << (64 + E); + } else if (E > -120) { + Below0 = D >> (-E - 64); + Extra = D << (128 + E); + ExtraShift = -64 - E; + } + + // Fall back on APFloat for very small and very large numbers. + if (!Above0 && !Below0) + return toStringAPFloat(D, E, Precision); + + // Append the digits before the decimal. + std::string Str; + size_t DigitsOut = 0; + if (Above0) { + appendNumber(Str, Above0); + DigitsOut = Str.size(); + } else + appendDigit(Str, 0); + std::reverse(Str.begin(), Str.end()); + + // Return early if there's nothing after the decimal. + if (!Below0) + return Str + ".0"; + + // Append the decimal and beyond. + Str += '.'; + uint64_t Error = UINT64_C(1) << (64 - Width); + + // We need to shift Below0 to the right to make space for calculating + // digits. Save the precision we're losing in Extra. + Extra = (Below0 & 0xf) << 56 | (Extra >> 8); + Below0 >>= 4; + size_t SinceDot = 0; + size_t AfterDot = Str.size(); + do { + if (ExtraShift) { + --ExtraShift; + Error *= 5; + } else + Error *= 10; + + Below0 *= 10; + Extra *= 10; + Below0 += (Extra >> 60); + Extra = Extra & (UINT64_MAX >> 4); + appendDigit(Str, Below0 >> 60); + Below0 = Below0 & (UINT64_MAX >> 4); + if (DigitsOut || Str.back() != '0') + ++DigitsOut; + ++SinceDot; + } while (Error && (Below0 << 4 | Extra >> 60) >= Error/2 && + (!Precision || DigitsOut <= Precision || SinceDot < 2)); + + // Return early for maximum precision. + if (!Precision || DigitsOut <= Precision) + return stripTrailingZeros(Str); + + // Find where to truncate. + size_t Truncate = std::max(Str.size() - (DigitsOut - Precision), + AfterDot + 1); + + // Check if there's anything to truncate. + if (Truncate >= Str.size()) + return stripTrailingZeros(Str); + + bool Carry = doesRoundUp(Str[Truncate]); + if (!Carry) + return stripTrailingZeros(Str.substr(0, Truncate)); + + // Round with the first truncated digit. + for (std::string::reverse_iterator + I(Str.begin() + Truncate), E = Str.rend(); I != E; ++I) { + if (*I == '.') + continue; + if (*I == '9') { + *I = '0'; + continue; + } + + ++*I; + Carry = false; + break; + } + + // Add "1" in front if we still need to carry. + return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate)); +} + +raw_ostream & +PositiveFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E, int Width, + unsigned Precision) { + return OS << toString(D, E, Width, Precision); +} + +void PositiveFloatBase::dump(uint64_t D, int16_t E, int Width) { + print(dbgs(), D, E, Width, 0) + << "[" << Width << ":" << D << "*2^" << E << "]"; +} + +static std::pair +getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) { + if (ShouldRound) + if (!++N) + // Rounding caused an overflow. + return std::make_pair(UINT64_C(1), Shift + 64); + return std::make_pair(N, Shift); +} + +std::pair +PositiveFloatBase::divide64(uint64_t Dividend, uint64_t Divisor) { + // Input should be sanitized. + assert(Divisor); + assert(Dividend); + + // Minimize size of divisor. + int16_t Shift = 0; + if (int Zeros = countTrailingZeros(Divisor)) { + Shift -= Zeros; + Divisor >>= Zeros; + } + + // Check for powers of two. + if (Divisor == 1) + return std::make_pair(Dividend, Shift); + + // Maximize size of dividend. + if (int Zeros = countLeadingZeros64(Dividend)) { + Shift -= Zeros; + Dividend <<= Zeros; + } + + // Start with the result of a divide. + uint64_t Quotient = Dividend / Divisor; + Dividend %= Divisor; + + // Continue building the quotient with long division. + // + // TODO: continue with largers digits. + while (!(Quotient >> 63) && Dividend) { + // Shift Dividend, and check for overflow. + bool IsOverflow = Dividend >> 63; + Dividend <<= 1; + --Shift; + + // Divide. + bool DoesDivide = IsOverflow || Divisor <= Dividend; + Quotient = (Quotient << 1) | DoesDivide; + Dividend -= DoesDivide ? Divisor : 0; + } + + // Round. + if (Dividend >= getHalf(Divisor)) + if (!++Quotient) + // Rounding caused an overflow in Quotient. + return std::make_pair(UINT64_C(1), Shift + 64); + + return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift); +} + +static void addWithCarry(uint64_t &Upper, uint64_t &Lower, uint64_t N) { + uint64_t NewLower = Lower + (N << 32); + Upper += (N >> 32) + (NewLower < Lower); + Lower = NewLower; +} + +std::pair +PositiveFloatBase::multiply64(uint64_t L, uint64_t R) { + // Separate into two 32-bit digits (U.L). + uint64_t UL = L >> 32, LL = L & UINT32_MAX, + UR = R >> 32, LR = R & UINT32_MAX; + + // Compute cross products. + uint64_t P1 = UL*UR, P2 = UL*LR, P3 = LL*UR, P4 = LL*LR; + + // Sum into two 64-bit digits. + uint64_t Upper = P1, Lower = P4; + addWithCarry(Upper, Lower, P2); + addWithCarry(Upper, Lower, P3); + + // Check for the lower 32 bits. + if (!Upper) + return std::make_pair(Lower, 0); + + // Shift as little as possible to maximize precision. + unsigned LeadingZeros = countLeadingZeros64(Upper); + int16_t Shift = 64 - LeadingZeros; + if (LeadingZeros) + Upper = Upper << LeadingZeros | Lower >> Shift; + bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1)); + return getRoundedFloat(Upper, ShouldRound, Shift); +} Index: lib/Support/PrettyStackTrace.cpp =================================================================== --- lib/Support/PrettyStackTrace.cpp +++ lib/Support/PrettyStackTrace.cpp @@ -43,6 +43,8 @@ return NextID+1; } +int fff() { return 0; } + /// PrintCurStackTrace - Print the current stack trace to the specified stream. static void PrintCurStackTrace(raw_ostream &OS) { // Don't print an empty trace. Index: test/Analysis/BlockFrequencyInfo/bad_input.ll =================================================================== --- /dev/null +++ test/Analysis/BlockFrequencyInfo/bad_input.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -analyze -block-freq | FileCheck %s + +declare void @g(i32 %x) + +; CHECK-LABEL: Printing analysis {{.*}} for function 'branch_weight_0': +; CHECK-NEXT: block-frequency-info: branch_weight_0 +define void @branch_weight_0(i32 %a) { +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] +entry: + br label %for.body + +; Check that we get 1,4 instead of 0,3. +; CHECK-NEXT: for.body: float = 4.0, +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + call void @g(i32 %i) + %inc = add i32 %i, 1 + %cmp = icmp ugt i32 %inc, %a + br i1 %cmp, label %for.end, label %for.body, !prof !0 + +; CHECK-NEXT: for.end: float = 1.0, int = [[ENTRY]] +for.end: + ret void +} + +!0 = metadata !{metadata !"branch_weights", i32 0, i32 3} + +; CHECK-LABEL: Printing analysis {{.*}} for function 'infinite_loop' +; CHECK-NEXT: block-frequency-info: infinite_loop +define void @infinite_loop(i1 %x) { +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] +entry: + br i1 %x, label %for.body, label %for.end, !prof !1 + +; Check that the loop scale maxes out at 4096, giving 2048 here. +; CHECK-NEXT: for.body: float = 2048.0, +for.body: + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + call void @g(i32 %i) + %inc = add i32 %i, 1 + br label %for.body + +; Check that the exit weight is half of entry, since half is lost in the +; infinite loop above. +; CHECK-NEXT: for.end: float = 0.5, +for.end: + ret void +} + +!1 = metadata !{metadata !"branch_weights", i32 1, i32 1} Index: test/Analysis/BlockFrequencyInfo/basic.ll =================================================================== --- test/Analysis/BlockFrequencyInfo/basic.ll +++ test/Analysis/BlockFrequencyInfo/basic.ll @@ -1,13 +1,14 @@ ; RUN: opt < %s -analyze -block-freq | FileCheck %s define i32 @test1(i32 %i, i32* %a) { -; CHECK: Printing analysis {{.*}} for function 'test1' -; CHECK: entry = 1.0 +; CHECK-LABEL: Printing analysis {{.*}} for function 'test1': +; CHECK-NEXT: block-frequency-info: test1 +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] entry: br label %body ; Loop backedges are weighted and thus their bodies have a greater frequency. -; CHECK: body = 32.0 +; CHECK-NEXT: body: float = 32.0, body: %iv = phi i32 [ 0, %entry ], [ %next, %body ] %base = phi i32 [ 0, %entry ], [ %sum, %body ] @@ -18,29 +19,29 @@ %exitcond = icmp eq i32 %next, %i br i1 %exitcond, label %exit, label %body -; CHECK: exit = 1.0 +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] exit: ret i32 %sum } define i32 @test2(i32 %i, i32 %a, i32 %b) { -; CHECK: Printing analysis {{.*}} for function 'test2' -; CHECK: entry = 1.0 +; CHECK-LABEL: Printing analysis {{.*}} for function 'test2': +; CHECK-NEXT: block-frequency-info: test2 +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] entry: %cond = icmp ult i32 %i, 42 br i1 %cond, label %then, label %else, !prof !0 ; The 'then' branch is predicted more likely via branch weight metadata. -; CHECK: then = 0.94116 +; CHECK-NEXT: then: float = 0.9411{{[0-9]*}}, then: br label %exit -; CHECK: else = 0.05877 +; CHECK-NEXT: else: float = 0.05882{{[0-9]*}}, else: br label %exit -; FIXME: It may be a bug that we don't sum back to 1.0. -; CHECK: exit = 0.99993 +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] exit: %result = phi i32 [ %a, %then ], [ %b, %else ] ret i32 %result @@ -49,37 +50,37 @@ !0 = metadata !{metadata !"branch_weights", i32 64, i32 4} define i32 @test3(i32 %i, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) { -; CHECK: Printing analysis {{.*}} for function 'test3' -; CHECK: entry = 1.0 +; CHECK-LABEL: Printing analysis {{.*}} for function 'test3': +; CHECK-NEXT: block-frequency-info: test3 +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] entry: switch i32 %i, label %case_a [ i32 1, label %case_b i32 2, label %case_c i32 3, label %case_d i32 4, label %case_e ], !prof !1 -; CHECK: case_a = 0.04998 +; CHECK-NEXT: case_a: float = 0.05, case_a: br label %exit -; CHECK: case_b = 0.04998 +; CHECK-NEXT: case_b: float = 0.05, case_b: br label %exit ; The 'case_c' branch is predicted more likely via branch weight metadata. -; CHECK: case_c = 0.79998 +; CHECK-NEXT: case_c: float = 0.8, case_c: br label %exit -; CHECK: case_d = 0.04998 +; CHECK-NEXT: case_d: float = 0.05, case_d: br label %exit -; CHECK: case_e = 0.04998 +; CHECK-NEXT: case_e: float = 0.05, case_e: br label %exit -; FIXME: It may be a bug that we don't sum back to 1.0. -; CHECK: exit = 0.99993 +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] exit: %result = phi i32 [ %a, %case_a ], [ %b, %case_b ], @@ -91,44 +92,50 @@ !1 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 64, i32 4, i32 4} -; CHECK: Printing analysis {{.*}} for function 'nested_loops' -; CHECK: entry = 1.0 -; This test doesn't seem to be assigning sensible frequencies to nested loops. define void @nested_loops(i32 %a) { +; CHECK-LABEL: Printing analysis {{.*}} for function 'nested_loops': +; CHECK-NEXT: block-frequency-info: nested_loops +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] entry: br label %for.cond1.preheader +; CHECK-NEXT: for.cond1.preheader: float = 4001.0, for.cond1.preheader: %x.024 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ] br label %for.cond4.preheader +; CHECK-NEXT: for.cond4.preheader: float = 16008001.0, for.cond4.preheader: %y.023 = phi i32 [ 0, %for.cond1.preheader ], [ %inc9, %for.inc8 ] %add = add i32 %y.023, %x.024 br label %for.body6 +; CHECK-NEXT: for.body6: float = 64048012001.0, for.body6: %z.022 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ] %add7 = add i32 %add, %z.022 - tail call void @g(i32 %add7) #2 + tail call void @g(i32 %add7) %inc = add i32 %z.022, 1 %cmp5 = icmp ugt i32 %inc, %a br i1 %cmp5, label %for.inc8, label %for.body6, !prof !2 +; CHECK-NEXT: for.inc8: float = 16008001.0, for.inc8: %inc9 = add i32 %y.023, 1 %cmp2 = icmp ugt i32 %inc9, %a br i1 %cmp2, label %for.inc11, label %for.cond4.preheader, !prof !2 +; CHECK-NEXT: for.inc11: float = 4001.0, for.inc11: %inc12 = add i32 %x.024, 1 %cmp = icmp ugt i32 %inc12, %a br i1 %cmp, label %for.end13, label %for.cond1.preheader, !prof !2 +; CHECK-NEXT: for.end13: float = 1.0, int = [[ENTRY]] for.end13: ret void } -declare void @g(i32) #1 +declare void @g(i32) !2 = metadata !{metadata !"branch_weights", i32 1, i32 4000} Index: test/Analysis/BlockFrequencyInfo/irreducible.ll =================================================================== --- /dev/null +++ test/Analysis/BlockFrequencyInfo/irreducible.ll @@ -0,0 +1,107 @@ +; RUN: opt < %s -analyze -block-freq | FileCheck %s + +declare void @f(i32 %x) +declare void @g(i32 %x) +declare void @h(i32 %x) +declare void @i(i32 %x) + +; CHECK-LABEL: Printing analysis {{.*}} for function 'multiexit': +; CHECK-NEXT: block-frequency-info: multiexit +define void @multiexit(i32 %a) { +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] +entry: + br label %loop.1 + +; CHECK-NEXT: loop.1: float = 1.333{{3*}}, +loop.1: + %i = phi i32 [ 0, %entry ], [ %inc.2, %loop.2 ] + call void @f(i32 %i) + %inc.1 = add i32 %i, 1 + %cmp.1 = icmp ugt i32 %inc.1, %a + br i1 %cmp.1, label %exit.1, label %loop.2, !prof !0 + +; CHECK-NEXT: loop.2: float = 0.666{{6*7}}, +loop.2: + call void @g(i32 %inc.1) + %inc.2 = add i32 %inc.1, 1 + %cmp.2 = icmp ugt i32 %inc.2, %a + br i1 %cmp.2, label %exit.2, label %loop.1, !prof !1 + +; CHECK-NEXT: exit.1: float = 0.666{{6*7}}, +exit.1: + call void @h(i32 %inc.1) + br label %return + +; CHECK-NEXT: exit.2: float = 0.333{{3*}}, +exit.2: + call void @i(i32 %inc.2) + br label %return + +; CHECK-NEXT: return: float = 1.0, int = [[ENTRY]] +return: + ret void +} + +!0 = metadata !{metadata !"branch_weights", i32 3, i32 3} +!1 = metadata !{metadata !"branch_weights", i32 5, i32 5} + +; The current BlockFrequencyInfo algorithm doesn't handle multiple entrances +; into a loop very well. The frequencies assigned to blocks in the loop are +; predictable (and not absurd), but also not correct and therefore not worth +; testing. +; +; In particular, in this case c1 and c2 should be treated as equals in a +; single loop. The exit frequency is 1/3, so the scaling factor for the loop +; should be 3.0. The loop is entered 2/3 of the time, and c1 and c2 split the +; total loop mass evenly (1/2), so they should each have frequencies of 1.0 +; (3.0*2/3*1/2). Another way of computing this result is by assigning 1.0 +; to entry and showing that c1 and c2 should accumulate masses of: +; +; 1/3 + 2/9 + 4/27 + 8/81 + ... +; 2^0/3^1 + 2^1/3^2 + 2^2/3^3 + 2^3/3^4 + ... +; +; At the first step, c1 and c2 each get 1/3 of the entry. At each subsequent +; step, c1 and c2 each get 1/3 of what's left in c1 and c2 combined. This +; infinite series sums to 1. +; +; However, the current algorithm returns 1.6 and 1.2 respectively (assuming c1 +; precedes c2 in the reverse post-order traversal). Treating them as +; independent loop headers inflates the scaling factor and breaks the +; symmetry. +; +; CHECK-LABEL: Printing analysis {{.*}} for function 'crossloops': +; CHECK-NEXT: block-frequency-info: crossloops +define void @crossloops(i32 %a) { +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] +entry: + %choose = call i32 @choose(i32 %a) + switch i32 %choose, label %exit [ i32 1, label %c1 + i32 2, label %c2 ], !prof !2 + +; CHECK-NEXT: c1: +c1: + %i1 = phi i32 [ %a, %entry ], [ %i1.inc, %c1 ], [ %i2.inc, %c2 ] + %i1.inc = add i32 %i1, 1 + %choose1 = call i32 @choose(i32 %i1) + switch i32 %choose1, label %exit [ i32 1, label %c1 + i32 2, label %c2 ], !prof !2 + +; CHECK-NEXT: c2: +c2: + %i2 = phi i32 [ %a, %entry ], [ %i1.inc, %c1 ], [ %i2.inc, %c2 ] + %i2.inc = add i32 %i2, 1 + %choose2 = call i32 @choose(i32 %i2) + switch i32 %choose2, label %exit [ i32 1, label %c1 + i32 2, label %c2 ], !prof !2 + +; We still shouldn't lose any mass. +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] +exit: + ret void +} + +declare i32 @choose(i32) +declare void @f1(i32) +declare void @f2(i32) + +!2 = metadata !{metadata !"branch_weights", i32 2, i32 2, i32 2} Index: test/Analysis/BlockFrequencyInfo/loop_with_branch.ll =================================================================== --- /dev/null +++ test/Analysis/BlockFrequencyInfo/loop_with_branch.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -analyze -block-freq | FileCheck %s + +; CHECK-LABEL: Printing analysis {{.*}} for function 'loop_with_branch': +; CHECK-NEXT: block-frequency-info: loop_with_branch +define void @loop_with_branch(i32 %a) { +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] +entry: + %skip_loop = call i1 @foo0(i32 %a) + br i1 %skip_loop, label %skip, label %header, !prof !0 + +; CHECK-NEXT: skip: float = 0.25, +skip: + br label %exit + +; CHECK-NEXT: header: float = 4.5, +header: + %i = phi i32 [ 0, %entry ], [ %i.next, %back ] + %i.next = add i32 %i, 1 + %choose = call i2 @foo1(i32 %i) + switch i2 %choose, label %exit [ i2 0, label %left + i2 1, label %right ], !prof !1 + +; CHECK-NEXT: left: float = 1.5, +left: + br label %back + +; CHECK-NEXT: right: float = 2.25, +right: + br label %back + +; CHECK-NEXT: back: float = 3.75, +back: + br label %header + +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] +exit: + ret void +} + +declare i1 @foo0(i32) +declare i2 @foo1(i32) + +!0 = metadata !{metadata !"branch_weights", i32 1, i32 3} +!1 = metadata !{metadata !"branch_weights", i32 1, i32 2, i32 3} Index: test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll =================================================================== --- /dev/null +++ test/Analysis/BlockFrequencyInfo/nested_loop_with_branches.ll @@ -0,0 +1,59 @@ +; RUN: opt < %s -analyze -block-freq | FileCheck %s + +; CHECK-LABEL: Printing analysis {{.*}} for function 'nested_loop_with_branches' +; CHECK-NEXT: block-frequency-info: nested_loop_with_branches +define void @nested_loop_with_branches(i32 %a) { +; CHECK-NEXT: entry: float = 1.0, int = [[ENTRY:[0-9]+]] +entry: + %v0 = call i1 @foo0(i32 %a) + br i1 %v0, label %exit, label %outer, !prof !0 + +; CHECK-NEXT: outer: float = 12.0, +outer: + %i = phi i32 [ 0, %entry ], [ %i.next, %inner.end ], [ %i.next, %no_inner ] + %i.next = add i32 %i, 1 + %do_inner = call i1 @foo1(i32 %i) + br i1 %do_inner, label %no_inner, label %inner, !prof !0 + +; CHECK-NEXT: inner: float = 36.0, +inner: + %j = phi i32 [ 0, %outer ], [ %j.next, %inner.end ] + %side = call i1 @foo3(i32 %j) + br i1 %side, label %left, label %right, !prof !0 + +; CHECK-NEXT: left: float = 9.0, +left: + %v4 = call i1 @foo4(i32 %j) + br label %inner.end + +; CHECK-NEXT: right: float = 27.0, +right: + %v5 = call i1 @foo5(i32 %j) + br label %inner.end + +; CHECK-NEXT: inner.end: float = 36.0, +inner.end: + %stay_inner = phi i1 [ %v4, %left ], [ %v5, %right ] + %j.next = add i32 %j, 1 + br i1 %stay_inner, label %inner, label %outer, !prof !1 + +; CHECK-NEXT: no_inner: float = 3.0, +no_inner: + %continue = call i1 @foo6(i32 %i) + br i1 %continue, label %outer, label %exit, !prof !1 + +; CHECK-NEXT: exit: float = 1.0, int = [[ENTRY]] +exit: + ret void +} + +declare i1 @foo0(i32) +declare i1 @foo1(i32) +declare i1 @foo2(i32) +declare i1 @foo3(i32) +declare i1 @foo4(i32) +declare i1 @foo5(i32) +declare i1 @foo6(i32) + +!0 = metadata !{metadata !"branch_weights", i32 1, i32 3} +!1 = metadata !{metadata !"branch_weights", i32 3, i32 1} Index: test/CodeGen/XCore/llvm-intrinsics.ll =================================================================== --- test/CodeGen/XCore/llvm-intrinsics.ll +++ test/CodeGen/XCore/llvm-intrinsics.ll @@ -287,9 +287,8 @@ ; CHECKFP: .LBB{{[0-9_]+}} ; CHECKFP-NEXT: ldc r2, 40 ; CHECKFP-NEXT: add r2, r10, r2 -; CHECKFP-NEXT: add r0, r2, r0 +; CHECKFP-NEXT: add r2, r2, r0 ; CHECKFP-NEXT: mov r3, r1 -; CHECKFP-NEXT: mov r2, r0 ; CHECKFP-NEXT: ldw r9, r10[4] ; CHECKFP-NEXT: ldw r8, r10[5] ; CHECKFP-NEXT: ldw r7, r10[6] @@ -337,9 +336,8 @@ ; CHECK-NEXT: ldc r2, 36 ; CHECK-NEXT: ldaw r3, sp[0] ; CHECK-NEXT: add r2, r3, r2 -; CHECK-NEXT: add r0, r2, r0 +; CHECK-NEXT: add r2, r2, r0 ; CHECK-NEXT: mov r3, r1 -; CHECK-NEXT: mov r2, r0 ; CHECK-NEXT: ldw r10, sp[2] ; CHECK-NEXT: ldw r9, sp[3] ; CHECK-NEXT: ldw r8, sp[4] Index: unittests/Support/BlockMassTest.cpp =================================================================== --- /dev/null +++ unittests/Support/BlockMassTest.cpp @@ -0,0 +1,160 @@ +//===- llvm/unittest/Support/BlockMassTest.cpp - BlockMass tests ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/BlockMass.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/PositiveFloat.h" +#include "llvm/Support/raw_ostream.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace llvm { +void PrintTo(const BlockMass &M, ::std::ostream *os) { + std::string String; + { + raw_string_ostream Raw(String); + Raw << M << "|" << M.getMass(); + } + *os << String; +} +template +void PrintTo(const PositiveFloat &F, ::std::ostream *os) { + *os << F.getDigits() << "*2^" << F.getExponent(); +} +} +namespace { + +typedef BlockMass BM; +TEST(BlockMassTest, Accessors) { + EXPECT_EQ(UINT64_C(0), BM::getEmpty().getMass()); + EXPECT_TRUE(!BM::getEmpty()); + EXPECT_TRUE(BM::getEmpty().isEmpty()); + EXPECT_FALSE(BM::getEmpty().isFull()); + + EXPECT_EQ(UINT64_MAX, BM::getFull().getMass()); + EXPECT_FALSE(!BM::getFull()); + EXPECT_FALSE(BM::getFull().isEmpty()); + EXPECT_TRUE(BM::getFull().isFull()); + + EXPECT_EQ(UINT64_C(789), BM(789).getMass()); + EXPECT_FALSE(!BM(789)); + EXPECT_FALSE(BM(789).isEmpty()); + EXPECT_FALSE(BM(789).isFull()); +} + +TEST(BlockMassTest, Comparison) { + EXPECT_TRUE(BM(75) <= BM(75)); + EXPECT_TRUE(BM(75) >= BM(75)); + EXPECT_TRUE(BM(75) == BM(75)); + EXPECT_FALSE(BM(75) < BM(75)); + EXPECT_FALSE(BM(75) > BM(75)); + EXPECT_FALSE(BM(75) != BM(75)); + + EXPECT_TRUE(BM(74) < BM(75)); + EXPECT_TRUE(BM(74) <= BM(75)); + EXPECT_TRUE(BM(74) != BM(75)); + EXPECT_FALSE(BM(74) > BM(75)); + EXPECT_FALSE(BM(74) >= BM(75)); + EXPECT_FALSE(BM(74) == BM(75)); + + EXPECT_TRUE(BM(75) > BM(74)); + EXPECT_TRUE(BM(75) >= BM(74)); + EXPECT_TRUE(BM(75) != BM(74)); + EXPECT_FALSE(BM(75) < BM(74)); + EXPECT_FALSE(BM(75) <= BM(74)); + EXPECT_FALSE(BM(75) == BM(74)); +} + +TEST(BlockMassTest, Add) { + // Trivial. + EXPECT_EQ(BM(0), BM(0) + BM(0)); + EXPECT_EQ(BM(1), BM(0) + BM(1)); + EXPECT_EQ(BM(1), BM(1) + BM(0)); + EXPECT_EQ(BM(2), BM(1) + BM(1)); + EXPECT_EQ(BM(75), BM(6) + BM(69)); + + // Saturate. + EXPECT_TRUE((BM(UINT64_MAX) + BM(0)).isFull()); + EXPECT_TRUE((BM(UINT64_MAX) + BM(1)).isFull()); + EXPECT_TRUE((BM(UINT64_MAX) + BM(2)).isFull()); + EXPECT_TRUE((BM(UINT64_MAX) + BM(UINT64_MAX)).isFull()); + EXPECT_TRUE((BM(UINT64_MAX-5) + BM(20)).isFull()); +} + +TEST(BlockMassTest, Subtract) { + // Trivial. + EXPECT_EQ(BM(0), BM(0) - BM(0)); + EXPECT_EQ(BM(1), BM(1) - BM(0)); + EXPECT_EQ(BM(0), BM(1) - BM(1)); + EXPECT_EQ(BM(1), BM(2) - BM(1)); + EXPECT_EQ(BM(6), BM(75) - BM(69)); + + // Saturate. + EXPECT_TRUE((BM(0) - BM(1)).isEmpty()); + EXPECT_TRUE((BM(0) - BM(25)).isEmpty()); + EXPECT_TRUE((BM(0) - BM(UINT64_MAX)).isEmpty()); + EXPECT_TRUE((BM(1) - BM(2)).isEmpty()); + EXPECT_TRUE((BM(75) - BM(80)).isEmpty()); + EXPECT_TRUE((BM(75) - BM(UINT64_MAX - 1)).isEmpty()); +} + +TEST(BlockMassTest, Multiply) { + // Multiply by 1.0. + EXPECT_EQ(BM(UINT64_MAX), BM(UINT64_MAX) * BM(UINT64_MAX)); + EXPECT_EQ(BM(UINT64_MAX-1), BM(UINT64_MAX) * BM(UINT64_MAX-1)); + EXPECT_EQ(BM(UINT64_MAX-20), BM(UINT64_MAX) * BM(UINT64_MAX-20)); + EXPECT_EQ(BM(20), BM(UINT64_MAX) * BM(20)); + EXPECT_EQ(BM(0), BM(UINT64_MAX) * BM(0)); + EXPECT_EQ(BM(1), BM(UINT64_MAX) * BM(1)); + EXPECT_EQ(BM(2), BM(UINT64_MAX) * BM(2)); +} + +TEST(BlockMassTest, MultiplyByBranchProbability) { + typedef BranchProbability BP; + + // Multiply by 1.0. + EXPECT_EQ(BM(UINT64_MAX), BM(UINT64_MAX) * BP(1, 1)); + EXPECT_EQ(BM(UINT64_MAX), BM(UINT64_MAX) * BP(7, 7)); + EXPECT_EQ(BM(UINT32_MAX), BM(UINT32_MAX) * BP(1, 1)); + EXPECT_EQ(BM(UINT32_MAX), BM(UINT32_MAX) * BP(7, 7)); + EXPECT_EQ(BM(0), BM(0) * BP(1, 1)); + EXPECT_EQ(BM(0), BM(0) * BP(7, 7)); + + // Multiply by 0.0. + EXPECT_EQ(BM(0), BM(UINT64_MAX) * BP(0, 1)); + EXPECT_EQ(BM(0), BM(UINT32_MAX) * BP(0, 1)); + EXPECT_EQ(BM(0), BM(0) * BP(0, 1)); + + uint64_t Two63 = UINT64_C(1) << 63; + uint64_t Two31 = UINT64_C(1) << 31; + + // Multiply by 0.5. + EXPECT_EQ(BM(Two63 - 1), BM(UINT64_MAX) * BP(1, 2)); + + // Big fractions. + EXPECT_EQ(BM(1), BM(2) * BP(Two31, UINT32_MAX)); + EXPECT_EQ(BM(Two31), BM(Two31*2) * BP(Two31, UINT32_MAX)); + EXPECT_EQ(BM(Two63 + Two31), BM(UINT64_MAX) * BP(Two31, UINT32_MAX)); + + // High precision. + EXPECT_EQ(BM(UINT64_C(9223372047592194055)), + BM(UINT64_MAX) * BP(Two31+1, UINT32_MAX - 2)); +} + +TEST(BlockMassTest, ToFloat) { + typedef PositiveFloat F; + + EXPECT_EQ(F(1, 0), BM(UINT64_MAX).toFloat()); + EXPECT_EQ(F(UINT64_MAX, -64), BM(UINT64_MAX - 1).toFloat()); + EXPECT_EQ(F(790, -64), BM(789).toFloat()); + EXPECT_EQ(F(1, -64), BM(0).toFloat()); +} + +} Index: unittests/Support/CMakeLists.txt =================================================================== --- unittests/Support/CMakeLists.txt +++ unittests/Support/CMakeLists.txt @@ -6,6 +6,7 @@ AlignOfTest.cpp AllocatorTest.cpp ArrayRecyclerTest.cpp + BlockMassTest.cpp BlockFrequencyTest.cpp Casting.cpp CommandLineTest.cpp @@ -24,6 +25,7 @@ MemoryBufferTest.cpp MemoryTest.cpp Path.cpp + PositiveFloatTest.cpp ProcessTest.cpp ProgramTest.cpp RegexTest.cpp Index: unittests/Support/PositiveFloatTest.cpp =================================================================== --- /dev/null +++ unittests/Support/PositiveFloatTest.cpp @@ -0,0 +1,600 @@ +//===- llvm/unittest/Support/PositiveFloatTest.cpp - PositiveFloat tests -==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/PositiveFloat.h" +#include "llvm/Support/DataTypes.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace llvm { +template +void PrintTo(const PositiveFloat &F, ::std::ostream *os) { + *os << F.getDigits() << "*2^" << F.getExponent(); +} +} +namespace { + +typedef PositiveFloat F32; +typedef PositiveFloat F64; +static F32 getFraction32(uint32_t N, uint32_t D) { + return F32::getFraction(N, D); +} +static F64 getFraction64(uint64_t N, uint64_t D) { + return F64::getFraction(N, D); +} +TEST(PositiveFloatTest, IsZero) { + EXPECT_TRUE(F32(0, 0).isZero()); + EXPECT_TRUE(F32(0, 1).isZero()); + EXPECT_TRUE(F32(0, -20).isZero()); + EXPECT_FALSE(F32(1, 0).isZero()); + EXPECT_FALSE(F32(20, -3).isZero()); + + EXPECT_TRUE(F64(0, 0).isZero()); + EXPECT_TRUE(F64(0, 1).isZero()); + EXPECT_TRUE(F64(0, -20).isZero()); + EXPECT_FALSE(F64(1, 0).isZero()); + EXPECT_FALSE(F64(20, -3).isZero()); +} +TEST(PositiveFloatTest, Lg) { + EXPECT_EQ(0, F32(1, 0).lg()); + EXPECT_EQ(0, F32(1, 0).lgFloor()); + EXPECT_EQ(0, F32(1, 0).lgCeiling()); + EXPECT_EQ(1, F32(2, 0).lg()); + EXPECT_EQ(1, F32(2, 0).lgFloor()); + EXPECT_EQ(1, F32(2, 0).lgCeiling()); + EXPECT_EQ(1, F32(1, 1).lg()); + EXPECT_EQ(1, F32(1, 1).lgFloor()); + EXPECT_EQ(1, F32(1, 1).lgCeiling()); + EXPECT_EQ(-1, F32(1, -1).lg()); + EXPECT_EQ(-1, F32(2, -2).lg()); + EXPECT_EQ(INT32_MIN, F32(0, 0).lg()); + EXPECT_EQ(INT32_MIN, F32(0, 0).lgFloor()); + EXPECT_EQ(INT32_MIN, F32(0, 0).lgCeiling()); + EXPECT_EQ(INT32_MIN, F32(0, 1).lg()); + EXPECT_EQ(INT32_MIN, F32(0, 1).lgFloor()); + EXPECT_EQ(INT32_MIN, F32(0, 1).lgCeiling()); + EXPECT_EQ(INT32_MIN, F32(0, -1).lg()); + EXPECT_EQ(INT32_MIN, F32(0, -1).lgFloor()); + EXPECT_EQ(INT32_MIN, F32(0, -1).lgCeiling()); + EXPECT_EQ(3, F32(64, -3).lg()); + EXPECT_EQ(3, F32(64, -3).lgFloor()); + EXPECT_EQ(3, F32(64, -3).lgCeiling()); + EXPECT_EQ(3, F32(8, 0).lg()); + EXPECT_EQ(3, F32(8, 0).lgFloor()); + EXPECT_EQ(3, F32(8, 0).lgCeiling()); + EXPECT_EQ(3, F32(1, 3).lg()); + EXPECT_EQ(3, F32(1, 3).lgFloor()); + EXPECT_EQ(3, F32(1, 3).lgCeiling()); + EXPECT_EQ(3, F32(7, 0).lg()); + EXPECT_EQ(2, F32(7, 0).lgFloor()); + EXPECT_EQ(3, F32(7, 0).lgCeiling()); + EXPECT_EQ(3, F32(9, 0).lg()); + EXPECT_EQ(3, F32(9, 0).lgFloor()); + EXPECT_EQ(4, F32(9, 0).lgCeiling()); + + EXPECT_EQ(0, F64(1, 0).lg()); + EXPECT_EQ(0, F64(1, 0).lgFloor()); + EXPECT_EQ(0, F64(1, 0).lgCeiling()); + EXPECT_EQ(1, F64(2, 0).lg()); + EXPECT_EQ(1, F64(2, 0).lgFloor()); + EXPECT_EQ(1, F64(2, 0).lgCeiling()); + EXPECT_EQ(1, F64(1, 1).lg()); + EXPECT_EQ(1, F64(1, 1).lgFloor()); + EXPECT_EQ(1, F64(1, 1).lgCeiling()); + EXPECT_EQ(-1, F64(1, -1).lg()); + EXPECT_EQ(-1, F64(2, -2).lg()); + EXPECT_EQ(INT32_MIN, F64(0, 0).lg()); + EXPECT_EQ(INT32_MIN, F64(0, 0).lgFloor()); + EXPECT_EQ(INT32_MIN, F64(0, 0).lgCeiling()); + EXPECT_EQ(INT32_MIN, F64(0, 1).lg()); + EXPECT_EQ(INT32_MIN, F64(0, 1).lgFloor()); + EXPECT_EQ(INT32_MIN, F64(0, 1).lgCeiling()); + EXPECT_EQ(INT32_MIN, F64(0, -1).lg()); + EXPECT_EQ(INT32_MIN, F64(0, -1).lgFloor()); + EXPECT_EQ(INT32_MIN, F64(0, -1).lgCeiling()); + EXPECT_EQ(3, F64(64, -3).lg()); + EXPECT_EQ(3, F64(64, -3).lgFloor()); + EXPECT_EQ(3, F64(64, -3).lgCeiling()); + EXPECT_EQ(3, F64(8, 0).lg()); + EXPECT_EQ(3, F64(8, 0).lgFloor()); + EXPECT_EQ(3, F64(8, 0).lgCeiling()); + EXPECT_EQ(3, F64(1, 3).lg()); + EXPECT_EQ(3, F64(1, 3).lgFloor()); + EXPECT_EQ(3, F64(1, 3).lgCeiling()); + EXPECT_EQ(3, F64(7, 0).lg()); + EXPECT_EQ(2, F64(7, 0).lgFloor()); + EXPECT_EQ(3, F64(7, 0).lgCeiling()); + EXPECT_EQ(3, F64(9, 0).lg()); + EXPECT_EQ(3, F64(9, 0).lgFloor()); + EXPECT_EQ(4, F64(9, 0).lgCeiling()); +} +TEST(PositiveFloatTest, Compare) { + EXPECT_EQ(0, F32(0, 0).compare(F32(0, 1))); + EXPECT_EQ(0, F32(0, 0).compare(F32(0, -10))); + EXPECT_EQ(0, F32(0, 0).compare(F32(0, 20))); + EXPECT_EQ(0, F32(8, 0).compare(F32(64, -3))); + EXPECT_EQ(0, F32(8, 0).compare(F32(32, -2))); + EXPECT_EQ(0, F32(8, 0).compare(F32(16, -1))); + EXPECT_EQ(0, F32(8, 0).compare(F32(8, 0))); + EXPECT_EQ(0, F32(8, 0).compare(F32(4, 1))); + EXPECT_EQ(0, F32(8, 0).compare(F32(2, 2))); + EXPECT_EQ(0, F32(8, 0).compare(F32(1, 3))); + EXPECT_EQ(-1, F32(0, 0).compare(F32(1, 3))); + EXPECT_EQ(-1, F32(7, 0).compare(F32(1, 3))); + EXPECT_EQ(-1, F32(7, 0).compare(F32(64, -3))); + EXPECT_EQ(1, F32(9, 0).compare(F32(1, 3))); + EXPECT_EQ(1, F32(9, 0).compare(F32(64, -3))); + EXPECT_EQ(1, F32(9, 0).compare(F32(0, 0))); + + EXPECT_EQ(0, F64(0, 0).compare(F64(0, 1))); + EXPECT_EQ(0, F64(0, 0).compare(F64(0, -10))); + EXPECT_EQ(0, F64(0, 0).compare(F64(0, 20))); + EXPECT_EQ(0, F64(8, 0).compare(F64(64, -3))); + EXPECT_EQ(0, F64(8, 0).compare(F64(32, -2))); + EXPECT_EQ(0, F64(8, 0).compare(F64(16, -1))); + EXPECT_EQ(0, F64(8, 0).compare(F64(8, 0))); + EXPECT_EQ(0, F64(8, 0).compare(F64(4, 1))); + EXPECT_EQ(0, F64(8, 0).compare(F64(2, 2))); + EXPECT_EQ(0, F64(8, 0).compare(F64(1, 3))); + EXPECT_EQ(-1, F64(0, 0).compare(F64(1, 3))); + EXPECT_EQ(-1, F64(7, 0).compare(F64(1, 3))); + EXPECT_EQ(-1, F64(7, 0).compare(F64(64, -3))); + EXPECT_EQ(1, F64(9, 0).compare(F64(1, 3))); + EXPECT_EQ(1, F64(9, 0).compare(F64(64, -3))); + EXPECT_EQ(1, F64(9, 0).compare(F64(0, 0))); + EXPECT_EQ(-1, F64(UINT64_MAX, 0).compare(F64(1, 64))); +} +TEST(PositiveFloatTest, ComparisonOperators) { + EXPECT_EQ(F32(0, 0), F32(0, 1)); + EXPECT_EQ(F32(0, 0), F32(0, -10)); + EXPECT_EQ(F32(0, 0), F32(0, 20)); + EXPECT_EQ(F32(8, 0), F32(64, -3)); + EXPECT_EQ(F32(8, 0), F32(1, 3)); + EXPECT_LE(F32(0, 0), F32(0, 1)); + EXPECT_LE(F32(0, 0), F32(0, -10)); + EXPECT_LE(F32(0, 0), F32(0, 20)); + EXPECT_LE(F32(8, 0), F32(64, -3)); + EXPECT_LE(F32(8, 0), F32(1, 3)); + EXPECT_GE(F32(0, 0), F32(0, 1)); + EXPECT_GE(F32(0, 0), F32(0, -10)); + EXPECT_GE(F32(0, 0), F32(0, 20)); + EXPECT_GE(F32(8, 0), F32(64, -3)); + EXPECT_GE(F32(8, 0), F32(1, 3)); + EXPECT_LT(F32(0, 0), F32(1, 3)); + EXPECT_LT(F32(7, 0), F32(1, 3)); + EXPECT_LT(F32(7, 0), F32(64, -3)); + EXPECT_LE(F32(0, 0), F32(1, 3)); + EXPECT_LE(F32(7, 0), F32(1, 3)); + EXPECT_LE(F32(7, 0), F32(64, -3)); + EXPECT_NE(F32(0, 0), F32(1, 3)); + EXPECT_NE(F32(7, 0), F32(1, 3)); + EXPECT_NE(F32(7, 0), F32(64, -3)); + EXPECT_GT(F32(9, 0), F32(1, 3)); + EXPECT_GT(F32(9, 0), F32(64, -3)); + EXPECT_GT(F32(9, 0), F32(0, 0)); + EXPECT_GE(F32(9, 0), F32(1, 3)); + EXPECT_GE(F32(9, 0), F32(64, -3)); + EXPECT_GE(F32(9, 0), F32(0, 0)); + EXPECT_NE(F32(9, 0), F32(1, 3)); + EXPECT_NE(F32(9, 0), F32(64, -3)); + EXPECT_NE(F32(9, 0), F32(0, 0)); + + EXPECT_EQ(F64(0, 0), F64(0, 1)); + EXPECT_EQ(F64(0, 0), F64(0, -10)); + EXPECT_EQ(F64(0, 0), F64(0, 20)); + EXPECT_EQ(F64(8, 0), F64(64, -3)); + EXPECT_EQ(F64(8, 0), F64(1, 3)); + EXPECT_LE(F64(0, 0), F64(0, 1)); + EXPECT_LE(F64(0, 0), F64(0, -10)); + EXPECT_LE(F64(0, 0), F64(0, 20)); + EXPECT_LE(F64(8, 0), F64(64, -3)); + EXPECT_LE(F64(8, 0), F64(1, 3)); + EXPECT_GE(F64(0, 0), F64(0, 1)); + EXPECT_GE(F64(0, 0), F64(0, -10)); + EXPECT_GE(F64(0, 0), F64(0, 20)); + EXPECT_GE(F64(8, 0), F64(64, -3)); + EXPECT_GE(F64(8, 0), F64(1, 3)); + EXPECT_LT(F64(0, 0), F64(1, 3)); + EXPECT_LT(F64(7, 0), F64(1, 3)); + EXPECT_LT(F64(7, 0), F64(64, -3)); + EXPECT_LE(F64(0, 0), F64(1, 3)); + EXPECT_LE(F64(7, 0), F64(1, 3)); + EXPECT_LE(F64(7, 0), F64(64, -3)); + EXPECT_NE(F64(0, 0), F64(1, 3)); + EXPECT_NE(F64(7, 0), F64(1, 3)); + EXPECT_NE(F64(7, 0), F64(64, -3)); + EXPECT_GT(F64(9, 0), F64(1, 3)); + EXPECT_GT(F64(9, 0), F64(64, -3)); + EXPECT_GT(F64(9, 0), F64(0, 0)); + EXPECT_GE(F64(9, 0), F64(1, 3)); + EXPECT_GE(F64(9, 0), F64(64, -3)); + EXPECT_GE(F64(9, 0), F64(0, 0)); + EXPECT_NE(F64(9, 0), F64(1, 3)); + EXPECT_NE(F64(9, 0), F64(64, -3)); + EXPECT_NE(F64(9, 0), F64(0, 0)); +} + +TEST(PositiveFloatTest, Shift) { + EXPECT_EQ(F32(0, 0), F32(0, 0) >> 1); + EXPECT_EQ(F32(0, 0), F32(0, 0) << 1); + EXPECT_EQ(F32(0, 0), F32(0, 0) >> 47); + EXPECT_EQ(F32(0, 0), F32(0, 0) << 47); + EXPECT_EQ(F32(0, 0), F32(0, 0) >> -47); + EXPECT_EQ(F32(0, 0), F32(0, 0) << -47); + EXPECT_EQ(F32(1, -1), F32(1, 0) >> 1); + EXPECT_EQ(F32(1, 1), F32(1, 0) << 1); + EXPECT_EQ(F32(1, -47), F32(1, 0) >> 47); + EXPECT_EQ(F32(1, 47), F32(1, 0) << 47); + EXPECT_EQ(F32(1, 47), F32(1, 0) >> -47); + EXPECT_EQ(F32(1, -47), F32(1, 0) << -47); + + EXPECT_EQ(F64(0, 0), F64(0, 0) >> 1); + EXPECT_EQ(F64(0, 0), F64(0, 0) << 1); + EXPECT_EQ(F64(0, 0), F64(0, 0) >> 47); + EXPECT_EQ(F64(0, 0), F64(0, 0) << 47); + EXPECT_EQ(F64(0, 0), F64(0, 0) >> -47); + EXPECT_EQ(F64(0, 0), F64(0, 0) << -47); + EXPECT_EQ(F64(1, -1), F64(1, 0) >> 1); + EXPECT_EQ(F64(1, 1), F64(1, 0) << 1); + EXPECT_EQ(F64(1, -47), F64(1, 0) >> 47); + EXPECT_EQ(F64(1, 47), F64(1, 0) << 47); + EXPECT_EQ(F64(1, 47), F64(1, 0) >> -47); + EXPECT_EQ(F64(1, -47), F64(1, 0) << -47); +} +TEST(PositiveFloatTest, Add) { + // Zero. + EXPECT_EQ(F32(1, 0), F32(0, 0) + F32(1, 0)); + EXPECT_EQ(F32(1, 0), F32(0, 0) + F32(8, -3)); + EXPECT_EQ(F32(~0u, 0), F32(0, 0) + F32(~0u, 0)); + + // Basic. + EXPECT_EQ(F32(2, 0), F32(1, 0) + F32(1, 0)); + EXPECT_EQ(F32(3, 0), F32(1, 0) + F32(2, 0)); + EXPECT_EQ(F32(67, 0), F32(7, 0) + F32(60, 0)); + + // Different exponents. + EXPECT_EQ(F32(3, 0), F32(1, 0) + F32(1, 1)); + EXPECT_EQ(F32(4, 0), F32(2, 0) + F32(1, 1)); + + // Loss of precision. + EXPECT_EQ(F32(1, 32), F32(1, 32) + F32(1, 0)); + EXPECT_EQ(F32(1, 0), F32(1, -32) + F32(1, 0)); + + // Not quite loss of precision. + EXPECT_EQ(F32((1u<<31) + 1, 1), F32(1, 32) + F32(1, 1)); + EXPECT_EQ(F32((1u<<31) + 1, -32), F32(1, -32) + F32(1, -1)); + + // Overflow. + EXPECT_EQ(F32(1, 32), F32(1, 0) + F32(~0u, 0)); + + // Reverse operand order. + EXPECT_EQ(F32(1, 0), F32(1, 0) + F32(0, 0)); + EXPECT_EQ(F32(1, 0), F32(8, -3) + F32(0, 0)); + EXPECT_EQ(F32(~0u, 0), F32(~0u, 0) + F32(0, 0)); + EXPECT_EQ(F32(2, 0), F32(1, 0) + F32(1, 0)); + EXPECT_EQ(F32(3, 0), F32(2, 0) + F32(1, 0)); + EXPECT_EQ(F32(67, 0), F32(60, 0) + F32(7, 0)); + EXPECT_EQ(F32(3, 0), F32(1, 1) + F32(1, 0)); + EXPECT_EQ(F32(4, 0), F32(1, 1) + F32(2, 0)); + EXPECT_EQ(F32(1, 32), F32(1, 0) + F32(1, 32)); + EXPECT_EQ(F32(1, 0), F32(1, 0) + F32(1, -32)); + EXPECT_EQ(F32((1u<<31) + 1, 1), F32(1, 1) + F32(1, 32)); + EXPECT_EQ(F32((1u<<31) + 1, -32), F32(1, -1) + F32(1, -32)); + EXPECT_EQ(F32(1, 32), F32(~0u, 0) + F32(1, 0)); + + // Zero. + EXPECT_EQ(F64(1, 0), F64(0, 0) + F64(1, 0)); + EXPECT_EQ(F64(1, 0), F64(0, 0) + F64(8, -3)); + EXPECT_EQ(F64(~0u, 0), F64(0, 0) + F64(~0u, 0)); + + // Basic. + EXPECT_EQ(F64(2, 0), F64(1, 0) + F64(1, 0)); + EXPECT_EQ(F64(3, 0), F64(1, 0) + F64(2, 0)); + EXPECT_EQ(F64(67, 0), F64(7, 0) + F64(60, 0)); + + // Different exponents. + EXPECT_EQ(F64(3, 0), F64(1, 0) + F64(1, 1)); + EXPECT_EQ(F64(4, 0), F64(2, 0) + F64(1, 1)); + + // Loss of precision. + EXPECT_EQ(F64(1, 64), F64(1, 64) + F64(1, 0)); + EXPECT_EQ(F64(1, 0), F64(1, -64) + F64(1, 0)); + + // Not quite loss of precision. + EXPECT_EQ(F64((1ull<<63) + 1, 1), F64(1, 64) + F64(1, 1)); + EXPECT_EQ(F64((1ull<<63) + 1, -64), F64(1, -64) + F64(1, -1)); + + // Overflow. + EXPECT_EQ(F64(1, 64), F64(1, 0) + F64(~0ull, 0)); + + // Reverse operand order. + EXPECT_EQ(F64(1, 0), F64(1, 0) + F64(0, 0)); + EXPECT_EQ(F64(1, 0), F64(8, -3) + F64(0, 0)); + EXPECT_EQ(F64(~0u, 0), F64(~0u, 0) + F64(0, 0)); + EXPECT_EQ(F64(2, 0), F64(1, 0) + F64(1, 0)); + EXPECT_EQ(F64(3, 0), F64(2, 0) + F64(1, 0)); + EXPECT_EQ(F64(67, 0), F64(60, 0) + F64(7, 0)); + EXPECT_EQ(F64(3, 0), F64(1, 1) + F64(1, 0)); + EXPECT_EQ(F64(4, 0), F64(1, 1) + F64(2, 0)); + EXPECT_EQ(F64(1, 64), F64(1, 0) + F64(1, 64)); + EXPECT_EQ(F64(1, 0), F64(1, 0) + F64(1, -64)); + EXPECT_EQ(F64((1ull<<63) + 1, 1), F64(1, 1) + F64(1, 64)); + EXPECT_EQ(F64((1ull<<63) + 1, -64), F64(1, -1) + F64(1, -64)); + EXPECT_EQ(F64(1, 64), F64(~0ull, 0) + F64(1, 0)); +} +TEST(PositiveFloatTest, Subtract) { + // Basic. + EXPECT_EQ(F32(0, 0), F32(1, 0) - F32(1, 0)); + EXPECT_EQ(F32(1, 0), F32(2, 0) - F32(1, 0)); + EXPECT_EQ(F32(53, 0), F32(60, 0) - F32(7, 0)); + + // Equals "0", different exponents. + EXPECT_EQ(F32(0, 0), F32(2, 0) - F32(1, 1)); + + // Subtract "0". + EXPECT_EQ(F32(1, 0), F32(1, 0) - F32(0, 0)); + EXPECT_EQ(F32(8, -3), F32(8, -3) - F32(0, 0)); + EXPECT_EQ(F32(~0u, 0), F32(~0u, 0) - F32(0, 0)); + + // Loss of precision. + EXPECT_EQ(F32((1u<<31) + 1, 1), F32((1u<<31)+1, 1) - F32(1, 0)); + EXPECT_EQ(F32((1u<<31) + 1, -31), F32((1u<<31)+1, -31) - F32(1, -32)); + + // Not quite loss of precision. + EXPECT_EQ(F32(UINT32_MAX, 0), F32(1, 32) - F32(1, 0)); + EXPECT_EQ(F32(UINT32_MAX, -32), F32(1, 0) - F32(1, -32)); + + // Saturate to "0". + EXPECT_EQ(F32(0, 0), F32(0, 0) - F32(1, 0)); + EXPECT_EQ(F32(0, 0), F32(0, 0) - F32(8, -3)); + EXPECT_EQ(F32(0, 0), F32(0, 0) - F32(~0u, 0)); + EXPECT_EQ(F32(0, 0), F32(7, 0) - F32(60, 0)); + EXPECT_EQ(F32(0, 0), F32(1, 0) - F32(1, 1)); + EXPECT_EQ(F32(0, 0), F32(1, -32) - F32(1, 0)); + EXPECT_EQ(F32(0, 0), F32(1, -32) - F32(1, -1)); + + // Regression tests for cases that failed during bringup. + EXPECT_EQ(F32(1, -5), F32(1, 0) - F32(4160749568u, -32)); + + // Basic. + EXPECT_EQ(F64(0, 0), F64(1, 0) - F64(1, 0)); + EXPECT_EQ(F64(1, 0), F64(2, 0) - F64(1, 0)); + EXPECT_EQ(F64(53, 0), F64(60, 0) - F64(7, 0)); + + // Equals "0", different exponents. + EXPECT_EQ(F64(0, 0), F64(2, 0) - F64(1, 1)); + + // Subtract "0". + EXPECT_EQ(F64(1, 0), F64(1, 0) - F64(0, 0)); + EXPECT_EQ(F64(8, -3), F64(8, -3) - F64(0, 0)); + EXPECT_EQ(F64(~0u, 0), F64(~0u, 0) - F64(0, 0)); + + // Loss of precision. + EXPECT_EQ(F64((1ull<<63) + 1, 1), F64((1ull<<63)+1, 1) - F64(1, 0)); + EXPECT_EQ(F64((1ull<<63) + 1, -63), F64((1ull<<63)+1, -63) - F64(1, -64)); + + // Not quite loss of precision. + EXPECT_EQ(F64(UINT64_MAX, 0), F64(1, 64) - F64(1, 0)); + EXPECT_EQ(F64(UINT64_MAX, -64), F64(1, 0) - F64(1, -64)); + + // Saturate to "0". + EXPECT_EQ(F64(0, 0), F64(0, 0) - F64(1, 0)); + EXPECT_EQ(F64(0, 0), F64(0, 0) - F64(8, -3)); + EXPECT_EQ(F64(0, 0), F64(0, 0) - F64(~0u, 0)); + EXPECT_EQ(F64(0, 0), F64(7, 0) - F64(60, 0)); + EXPECT_EQ(F64(0, 0), F64(1, 0) - F64(1, 1)); + EXPECT_EQ(F64(0, 0), F64(1, -64) - F64(1, 0)); + EXPECT_EQ(F64(0, 0), F64(1, -64) - F64(1, -1)); +} +TEST(PositiveFloatTest, Multiply) { + // Zero. + EXPECT_EQ(F32(0, 0), F32(0, 0) * F32(0, 0)); + EXPECT_EQ(F32(0, 0), F32(0, 0) * F32(1, 0)); + EXPECT_EQ(F32(0, 0), F32(0, 0) * F32(33, -60)); + + // Basic. + EXPECT_EQ(F32(6, 0), F32(2, 0) * F32(3, 0)); + EXPECT_EQ(F32(UINT16_MAX/3 * UINT16_MAX/5*2, 0), + F32(UINT16_MAX/3, 0) * F32(UINT16_MAX/5*2, 0)); + + // Powers of two. + EXPECT_EQ(F32(1, 0), F32(1, 0) * F32(1, 0)); + EXPECT_EQ(F32(3, 1), F32(1, 0) * F32(3, 1)); + EXPECT_EQ(F32(3, 5), F32(1, 4) * F32(3, 1)); + EXPECT_EQ(F32(3, -2), F32(1, -3) * F32(3, 1)); + EXPECT_EQ(F32(33, -60), F32(1, 0) * F32(33, -60)); + EXPECT_EQ(F32(33, -61), F32(1, -1) * F32(33, -60)); + EXPECT_EQ(F32(33, 12), F32(1, 72) * F32(33, -60)); + EXPECT_EQ(F32(33, 12), F32(2, 71) * F32(33, -60)); + + // Overflow, no loss of precision. + // ==> (0xf00010 * 0x1001) * 2^(3-2) + // ==> (0xf00f00000 + 0x10010) * 2^1 + // ==> (0xf00f10010) * 2^1 + // ==> (0xf00f1001) * 2^5 + EXPECT_EQ(F32(0xf00f1001, 5), + F32(0xf00010, 3) * F32(0x1001, -2)); + + // Overflow, loss of precision, rounds down. + // ==> (0xf000070 * 0x1001) * 2^(3-2) + // ==> (0xf00f000000 + 0x70070) * 2^1 + // ==> (0xf00f070070) * 2^1 + // ==> (0xf00f0700) * 2^9 + EXPECT_EQ(F32(0xf00f0700, 9), + F32(0xf000070, 3) * F32(0x1001, -2)); + + // Overflow, loss of precision, rounds up. + // ==> (0xf000080 * 0x1001) * 2^(3-2) + // ==> (0xf00f000000 + 0x80080) * 2^1 + // ==> (0xf00f080080) * 2^1 + // ==> (0xf00f0801) * 2^9 + EXPECT_EQ(F32(0xf00f0801, 9), + F32(0xf000080, 3) * F32(0x1001, -2)); + + // Reverse operand order. + EXPECT_EQ(F32(0, 0), F32(1, 0) * F32(0, 0)); + EXPECT_EQ(F32(0, 0), F32(33, -60) * F32(0, 0)); + EXPECT_EQ(F32(6, 0), F32(3, 0) * F32(2, 0)); + EXPECT_EQ(F32(UINT16_MAX/3 * UINT16_MAX/5*2, 0), + F32(UINT16_MAX/5*2, 0) * F32(UINT16_MAX/3, 0)); + EXPECT_EQ(F32(3, 1), F32(3, 1) * F32(1, 0)); + EXPECT_EQ(F32(3, 5), F32(3, 1) * F32(1, 4)); + EXPECT_EQ(F32(3, -2), F32(3, 1) * F32(1, -3)); + EXPECT_EQ(F32(33, -60), F32(33, -60) * F32(1, 0)); + EXPECT_EQ(F32(33, -61), F32(33, -60) * F32(1, -1)); + EXPECT_EQ(F32(33, 12), F32(33, -60) * F32(1, 72)); + EXPECT_EQ(F32(33, 12), F32(33, -60) * F32(2, 71)); + EXPECT_EQ(F32(0xf00f1001, 5), + F32(0x1001, 3) * F32(0xf00010, -2)); + EXPECT_EQ(F32(0xf00f0700, 9), + F32(0x1001, 3) * F32(0xf000070, -2)); + EXPECT_EQ(F32(0xf00f0801, 9), + F32(0x1001, 3) * F32(0xf000080, -2)); + + // Round to overflow. + EXPECT_EQ(F64(1, 127), + F64(UINT64_C(10376293541461622786), 0) * + F64(UINT64_C(16397105843297379211), 0)); + + // Big number with rounding. + EXPECT_EQ(F64(UINT64_C(9223372036854775810), -51), + F64(UINT64_C(18446744073709551556), -60) * + F64(UINT64_C(9223372036854775840),-55)); +} +TEST(PositiveFloatTest, Divide) { + // Zero. + EXPECT_EQ(F32(0, 0), F32(0, 0) / F32(0, 0)); + EXPECT_EQ(F32(0, 0), F32(0, 0) / F32(1, 0)); + EXPECT_EQ(F32(0, 0), F32(0, 0) / F32(73, -3)); + EXPECT_EQ(F32::getLargest(), F32(1, 0) / F32(0, 0)); + EXPECT_EQ(F32::getLargest(), F32(6, 2) / F32(0, 0)); + + // Powers of two. + EXPECT_EQ(F32(1, 0), F32(1, 0) / F32(1, 0)); + EXPECT_EQ(F32(1, -1), F32(1, 0) / F32(1, 1)); + EXPECT_EQ(F32(1, -1), F32(1, -1) / F32(1, 0)); + EXPECT_EQ(F32(1, 1), F32(1, 0) / F32(1, -1)); + EXPECT_EQ(F32(1, 1), F32(1, 1) / F32(1, 0)); + EXPECT_EQ(F32(1, -8), F32(4, 2) / F32(16, 8)); + + // Divide by a power of two. + EXPECT_EQ(F32(7, 3), F32(7, 3) / F32(1, 0)); + EXPECT_EQ(F32(7, 2), F32(7, 3) / F32(2, 0)); + EXPECT_EQ(F32(7, 2), F32(7, 3) / F32(1, 1)); + EXPECT_EQ(F32(7, 4), F32(7, 3) / F32(1, -1)); + EXPECT_EQ(F32(7, 0), F32(7, 3) / F32(16, -1)); + EXPECT_EQ(F32(7, 7), F32(7, 3) / F32(16, -8)); + EXPECT_EQ(F32(7, -9), F32(7, 3) / F32(16, 8)); + + // Divide evenly. + EXPECT_EQ(F32(3, 0), F32(9, 0) / F32(3, 0)); + EXPECT_EQ(F32(9, 0), F32(63, 0) / F32(7, 0)); + + // Zero. + EXPECT_EQ(F64(0, 0), F64(0, 0) / F64(0, 0)); + EXPECT_EQ(F64(0, 0), F64(0, 0) / F64(1, 0)); + EXPECT_EQ(F64(0, 0), F64(0, 0) / F64(73, -3)); + EXPECT_EQ(F64::getLargest(), F64(1, 0) / F64(0, 0)); + EXPECT_EQ(F64::getLargest(), F64(6, 2) / F64(0, 0)); + + // Powers of two. + EXPECT_EQ(F64(1, 0), F64(1, 0) / F64(1, 0)); + EXPECT_EQ(F64(1, -1), F64(1, 0) / F64(1, 1)); + EXPECT_EQ(F64(1, -1), F64(1, -1) / F64(1, 0)); + EXPECT_EQ(F64(1, 1), F64(1, 0) / F64(1, -1)); + EXPECT_EQ(F64(1, 1), F64(1, 1) / F64(1, 0)); + EXPECT_EQ(F64(1, -8), F64(4, 2) / F64(16, 8)); + + // Divide by a power of two. + EXPECT_EQ(F64(7, 3), F64(7, 3) / F64(1, 0)); + EXPECT_EQ(F64(7, 2), F64(7, 3) / F64(2, 0)); + EXPECT_EQ(F64(7, 2), F64(7, 3) / F64(1, 1)); + EXPECT_EQ(F64(7, 4), F64(7, 3) / F64(1, -1)); + EXPECT_EQ(F64(7, 0), F64(7, 3) / F64(16, -1)); + EXPECT_EQ(F64(7, 7), F64(7, 3) / F64(16, -8)); + EXPECT_EQ(F64(7, -9), F64(7, 3) / F64(16, 8)); + + // Divide evenly. + EXPECT_EQ(F64(3, 0), F64(9, 0) / F64(3, 0)); + EXPECT_EQ(F64(9, 0), F64(63, 0) / F64(7, 0)); +} + +TEST(PositiveFloatTest, ToString) { + // Basic. + EXPECT_EQ("0.0", F32(0, 0).toString()); + EXPECT_EQ("1.0", F32(1, 0).toString()); + EXPECT_EQ("23.0", F32(23, 0).toString()); + EXPECT_EQ("9999999.0", F32(9999999, 0).toString()); + + // Some exponents. + EXPECT_EQ("16.0", F32(1, 4).toString()); + EXPECT_EQ("92.0", F32(23, 2).toString()); + EXPECT_EQ("19999998.0", F32(9999999, 1).toString()); + + // Fractional powers of two. + EXPECT_EQ("0.5", F32(1, -1).toString()); + EXPECT_EQ("0.25", F32(1, -2).toString()); + EXPECT_EQ("0.125", F32(1, -3).toString()); + + // Other fractions. + EXPECT_EQ("0.33333333", getFraction32(1, 3).toString(8)); + EXPECT_EQ("0.333333333", getFraction32(1, 3).toString(9)); + EXPECT_EQ("0.66666667", getFraction32(2, 3).toString(8)); + EXPECT_EQ("0.666666667", getFraction32(2, 3).toString(9)); + EXPECT_EQ("2.0", getFraction32(UINT32_MAX, INT32_MAX+1u).toString(8)); + EXPECT_EQ("0.2", getFraction32(1, 5).toString(9)); + EXPECT_EQ("0.1", getFraction32(1, 10).toString(9)); + + // Very large and small numbers. + EXPECT_EQ("3.777893186E+22", F32(1, 75).toString(10)); + EXPECT_EQ("2.710505431E-20", F32(1, -65).toString(10)); + + // Basic. + EXPECT_EQ("0.0", F64(0, 0).toString()); + EXPECT_EQ("1.0", F64(1, 0).toString()); + EXPECT_EQ("23.0", F64(23, 0).toString()); + EXPECT_EQ("9999999.0", F64(9999999, 0).toString()); + + // Some exponents. + EXPECT_EQ("16.0", F64(1, 4).toString()); + EXPECT_EQ("92.0", F64(23, 2).toString()); + EXPECT_EQ("19999998.0", F64(9999999, 1).toString()); + + // Fractional powers of two. + EXPECT_EQ("0.5", F64(1, -1).toString()); + EXPECT_EQ("0.25", F64(1, -2).toString()); + EXPECT_EQ("0.125", F64(1, -3).toString()); + + // Other fractions. + EXPECT_EQ("0.33333333", getFraction64(1, 3).toString(8)); + EXPECT_EQ("0.333333333", getFraction64(1, 3).toString(9)); + EXPECT_EQ("0.66666667", getFraction64(2, 3).toString(8)); + EXPECT_EQ("0.666666667", getFraction64(2, 3).toString(9)); + EXPECT_EQ("2.0", getFraction64(UINT64_MAX, INT64_MAX+1ull).toString(8)); + EXPECT_EQ("0.2", getFraction64(1, 5).toString(9)); + EXPECT_EQ("0.1", getFraction64(1, 10).toString(9)); + + // Very large and small numbers. + EXPECT_EQ("3.777893186E+22", F64(1, 75).toString(10)); + EXPECT_EQ("2.710505431E-20", F64(1, -65).toString(10)); + + // Some things that round up to small numbers of digits. + EXPECT_EQ("1.0", F64(UINT64_MAX, -64).toString()); + EXPECT_EQ("0.5", F64(UINT64_MAX, -65).toString()); + EXPECT_EQ("0.25", F64(UINT64_MAX, -66).toString()); + EXPECT_EQ("0.125", F64(UINT64_MAX, -67).toString()); + + // Regression tests for cases that failed during bringup. + EXPECT_EQ("0.02552960407", F64(0x06891bae8e52bf06, -64).toString()); + EXPECT_EQ("64048012001.0", + F64(UINT64_C(17192757307381903260), -28).toString()); +} + +}