diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.h @@ -581,6 +581,7 @@ /// Used as the result type for the variable value dataflow problem. using LiveInsT = SmallVector, 8>; + MachineDominatorTree *DomTree; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; const TargetFrameLowering *TFI; @@ -728,8 +729,19 @@ /// live-out arrays to the (initialized to zero) multidimensional arrays in /// \p MInLocs and \p MOutLocs. The outer dimension is indexed by block /// number, the inner by LocIdx. - void mlocDataflow(ValueIDNum **MInLocs, ValueIDNum **MOutLocs, - SmallVectorImpl &MLocTransfer); + void buildMLocValueMap(MachineFunction &MF, ValueIDNum **MInLocs, + ValueIDNum **MOutLocs, + SmallVectorImpl &MLocTransfer); + + /// Calculate the iterated-dominance-frontier for a set of defs, using the + /// existing LLVM facilities for this. Works for a single "value" or + /// machine/variable location. + /// \p AllBlocks Set of blocks where we might consume the value. + /// \p DefBlocks Set of blocks where the value/location is defined. + /// \p PHIBlocks Output set of blocks where PHIs must be placed. + void BlockPHIPlacement(const SmallPtrSetImpl &AllBlocks, + const SmallPtrSetImpl &DefBlocks, + SmallVectorImpl &PHIBlocks); /// Perform a control flow join (lattice value meet) of the values in machine /// locations at \p MBB. Follows the algorithm described in the file-comment, @@ -738,16 +750,15 @@ /// \p InLocs. \returns two bools -- the first indicates whether a change /// was made, the second whether a lattice downgrade occurred. If the latter /// is true, revisiting this block is necessary. - std::tuple - mlocJoin(MachineBasicBlock &MBB, - SmallPtrSet &Visited, - ValueIDNum **OutLocs, ValueIDNum *InLocs); + bool mlocJoin(MachineBasicBlock &MBB, + SmallPtrSet &Visited, + ValueIDNum **OutLocs, ValueIDNum *InLocs); /// Solve the variable value dataflow problem, for a single lexical scope. /// Uses the algorithm from the file comment to resolve control flow joins, /// although there are extra hacks, see vlocJoin. Reads the /// locations of values from the \p MInLocs and \p MOutLocs arrays (see - /// mlocDataflow) and reads the variable values transfer function from + /// buildMLocValueMap) and reads the variable values transfer function from /// \p AllTheVlocs. Live-in and Live-out variable values are stored locally, /// with the live-ins permanently stored to \p Output once the fixedpoint is /// reached. @@ -824,8 +835,9 @@ /// RPOT block ordering. void initialSetup(MachineFunction &MF); - bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC, - unsigned InputBBLimit, unsigned InputDbgValLimit) override; + bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, + TargetPassConfig *TPC, unsigned InputBBLimit, + unsigned InputDbgValLimit) override; public: /// Default construct and initialize the pass. diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp --- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -11,114 +11,48 @@ /// LiveDebugValues.cpp and VarLocBasedImpl.cpp for more information. /// /// This pass propagates variable locations between basic blocks, resolving -/// control flow conflicts between them. The problem is much like SSA -/// construction, where each DBG_VALUE instruction assigns the *value* that -/// a variable has, and every instruction where the variable is in scope uses -/// that variable. The resulting map of instruction-to-value is then translated -/// into a register (or spill) location for each variable over each instruction. +/// control flow conflicts between them. The problem is SSA construction, where +/// each debug instruction assigns the *value* that a variable has, and every +/// instruction where the variable is in scope uses that variable. The resulting +/// map of instruction-to-value is then translated into a register (or spill) +/// location for each variable over each instruction. /// -/// This pass determines which DBG_VALUE dominates which instructions, or if -/// none do, where values must be merged (like PHI nodes). The added -/// complication is that because codegen has already finished, a PHI node may -/// be needed for a variable location to be correct, but no register or spill -/// slot merges the necessary values. In these circumstances, the variable -/// location is dropped. +/// The primary difference from normal SSA construction is that we cannot +/// _create_ PHI values that contain variable values. CodeGen has already +/// completed, and we can't alter it just to make debug-info complete. Thus: +/// we can identify function positions where we would like a PHI value for a +/// variable, but must search the MachineFunction to see whether such a PHI is +/// available. If no such PHI exists, the variable location must be dropped. /// -/// What makes this analysis non-trivial is loops: we cannot tell in advance -/// whether a variable location is live throughout a loop, or whether its -/// location is clobbered (or redefined by another DBG_VALUE), without -/// exploring all the way through. -/// -/// To make this simpler we perform two kinds of analysis. First, we identify +/// To achieve this, we perform two kinds of analysis. First, we identify /// every value defined by every instruction (ignoring those that only move -/// another value), then compute a map of which values are available for each -/// instruction. This is stronger than a reaching-def analysis, as we create -/// PHI values where other values merge. -/// -/// Secondly, for each variable, we effectively re-construct SSA using each -/// DBG_VALUE as a def. The DBG_VALUEs read a value-number computed by the -/// first analysis from the location they refer to. We can then compute the -/// dominance frontiers of where a variable has a value, and create PHI nodes -/// where they merge. -/// This isn't precisely SSA-construction though, because the function shape -/// is pre-defined. If a variable location requires a PHI node, but no -/// PHI for the relevant values is present in the function (as computed by the -/// first analysis), the location must be dropped. -/// -/// Once both are complete, we can pass back over all instructions knowing: -/// * What _value_ each variable should contain, either defined by an -/// instruction or where control flow merges -/// * What the location of that value is (if any). -/// Allowing us to create appropriate live-in DBG_VALUEs, and DBG_VALUEs when -/// a value moves location. After this pass runs, all variable locations within -/// a block should be specified by DBG_VALUEs within that block, allowing -/// DbgEntityHistoryCalculator to focus on individual blocks. -/// -/// This pass is able to go fast because the size of the first -/// reaching-definition analysis is proportional to the working-set size of -/// the function, which the compiler tries to keep small. (It's also -/// proportional to the number of blocks). Additionally, we repeatedly perform -/// the second reaching-definition analysis with only the variables and blocks -/// in a single lexical scope, exploiting their locality. -/// -/// Determining where PHIs happen is trickier with this approach, and it comes -/// to a head in the major problem for LiveDebugValues: is a value live-through -/// a loop, or not? Your garden-variety dataflow analysis aims to build a set of -/// facts about a function, however this analysis needs to generate new value -/// numbers at joins. -/// -/// To do this, consider a lattice of all definition values, from instructions -/// and from PHIs. Each PHI is characterised by the RPO number of the block it -/// occurs in. Each value pair A, B can be ordered by RPO(A) < RPO(B): -/// with non-PHI values at the top, and any PHI value in the last block (by RPO -/// order) at the bottom. -/// -/// (Awkwardly: lower-down-the _lattice_ means a greater RPO _number_. Below, -/// "rank" always refers to the former). -/// -/// At any join, for each register, we consider: -/// * All incoming values, and -/// * The PREVIOUS live-in value at this join. -/// If all incoming values agree: that's the live-in value. If they do not, the -/// incoming values are ranked according to the partial order, and the NEXT -/// LOWEST rank after the PREVIOUS live-in value is picked (multiple values of -/// the same rank are ignored as conflicting). If there are no candidate values, -/// or if the rank of the live-in would be lower than the rank of the current -/// blocks PHIs, create a new PHI value. +/// another value), then re-compute an SSA-form representation of the +/// MachineFunction, using value propagation to eliminate any un-necessary +/// PHI values. This gives us a map of every value computed in the function, +/// and its location within the register file / stack. /// -/// Intuitively: if it's not immediately obvious what value a join should result -/// in, we iteratively descend from instruction-definitions down through PHI -/// values, getting closer to the current block each time. If the current block -/// is a loop head, this ordering is effectively searching outer levels of -/// loops, to find a value that's live-through the current loop. +/// Secondly, for each variable we perform the same analysis, where each debug +/// instruction is considered a def, and every instruction where the variable +/// is in lexical scope as a use. Value propagation is used again to eliminate +/// any un-necessary PHIs. This gives us a map of each variable to the value +/// it should have in a block. /// -/// If there is no value that's live-through this loop, a PHI is created for -/// this location instead. We can't use a lower-ranked PHI because by definition -/// it doesn't dominate the current block. We can't create a PHI value any -/// earlier, because we risk creating a PHI value at a location where values do -/// not in fact merge, thus misrepresenting the truth, and not making the true -/// live-through value for variable locations. +/// Once both are complete, we have two maps for each block: +/// * Variables to the values they should have, +/// * Values to the register / spill slot they are located in. +/// After which we can marry-up variable values with a location, and emit +/// DBG_VALUE instructions specifying those locations. Variable locations may +/// be dropped in this process due to the desired variable value not being +/// resident in any machine location, or because there is no PHI value in any +/// location that accurately represents the desired value. The building of +/// location lists for each block is left to DbgEntityHistoryCalculator. /// -/// This algorithm applies to both calculating the availability of values in -/// the first analysis, and the location of variables in the second. However -/// for the second we add an extra dimension of pain: creating a variable -/// location PHI is only valid if, for each incoming edge, -/// * There is a value for the variable on the incoming edge, and -/// * All the edges have that value in the same register. -/// Or put another way: we can only create a variable-location PHI if there is -/// a matching machine-location PHI, each input to which is the variables value -/// in the predecessor block. -/// -/// To accommodate this difference, each point on the lattice is split in -/// two: a "proposed" PHI and "definite" PHI. Any PHI that can immediately -/// have a location determined are "definite" PHIs, and no further work is -/// needed. Otherwise, a location that all non-backedge predecessors agree -/// on is picked and propagated as a "proposed" PHI value. If that PHI value -/// is truly live-through, it'll appear on the loop backedges on the next -/// dataflow iteration, after which the block live-in moves to be a "definite" -/// PHI. If it's not truly live-through, the variable value will be downgraded -/// further as we explore the lattice, or remains "proposed" and is considered -/// invalid once dataflow completes. +/// This pass is kept efficient because the size of the first SSA problem +/// is proportional to the working-set size of the function, which the compiler +/// tries to keep small. (It's also proportional to the number of blocks). +/// Additionally, we repeatedly perform the second SSA problem analysis with +/// only the variables and blocks in a single lexical scope, exploiting their +/// locality. /// /// ### Terminology /// @@ -128,15 +62,13 @@ /// contain the appropriate variable value. A value that is a PHI node is /// occasionally called an mphi. /// -/// The first dataflow problem is the "machine value location" problem, +/// The first SSA problem is the "machine value location" problem, /// because we're determining which machine locations contain which values. /// The "locations" are constant: what's unknown is what value they contain. /// -/// The second dataflow problem (the one for variables) is the "variable value +/// The second SSA problem (the one for variables) is the "variable value /// problem", because it's determining what values a variable has, rather than -/// what location those values are placed in. Unfortunately, it's not that -/// simple, because producing a PHI value always involves picking a location. -/// This is an imperfection that we just have to accept, at least for now. +/// what location those values are placed in. /// /// TODO: /// Overlapping fragments @@ -153,8 +85,10 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/CodeGen/LexicalScopes.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -1786,23 +1720,20 @@ } } -std::tuple -InstrRefBasedLDV::mlocJoin(MachineBasicBlock &MBB, - SmallPtrSet &Visited, - ValueIDNum **OutLocs, ValueIDNum *InLocs) { +bool InstrRefBasedLDV::mlocJoin( + MachineBasicBlock &MBB, SmallPtrSet &Visited, + ValueIDNum **OutLocs, ValueIDNum *InLocs) { LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n"); bool Changed = false; - bool DowngradeOccurred = false; - // Collect predecessors that have been visited. Anything that hasn't been - // visited yet is a backedge on the first iteration, and the meet of it's - // lattice value for all locations will be unaffected. + // Handle value-propagation when control flow merges on entry to a block. For + // any location without a PHI already placed, the location has the same value + // as its predecessors. If a PHI is placed, test to see whether it's now a + // redundant PHI that we can eliminate. + SmallVector BlockOrders; - for (auto Pred : MBB.predecessors()) { - if (Visited.count(Pred)) { - BlockOrders.push_back(Pred); - } - } + for (auto Pred : MBB.predecessors()) + BlockOrders.push_back(Pred); // Visit predecessors in RPOT order. auto Cmp = [&](const MachineBasicBlock *A, const MachineBasicBlock *B) { @@ -1812,83 +1743,60 @@ // Skip entry block. if (BlockOrders.size() == 0) - return std::tuple(false, false); + return false; - // Step through all machine locations, then look at each predecessor and - // detect disagreements. - unsigned ThisBlockRPO = BBToOrder.find(&MBB)->second; + // Step through all machine locations, look at each predecessor and test + // whether we can eliminate redundant PHIs. for (auto Location : MTracker->locations()) { LocIdx Idx = Location.Idx; + // Pick out the first predecessors live-out value for this location. It's - // guaranteed to be not a backedge, as we order by RPO. - ValueIDNum BaseVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()]; + // guaranteed to not be a backedge, as we order by RPO. + ValueIDNum FirstVal = OutLocs[BlockOrders[0]->getNumber()][Idx.asU64()]; + + // If we've already eliminated a PHI here, do no further checking, just + // propagate the first live-in value into this block. + if (InLocs[Idx.asU64()] != ValueIDNum(MBB.getNumber(), 0, Idx)) { + if (InLocs[Idx.asU64()] != FirstVal) { + InLocs[Idx.asU64()] = FirstVal; + Changed |= true; + } + continue; + } - // Some flags for whether there's a disagreement, and whether it's a - // disagreement with a backedge or not. + // We're now examining a PHI to see whether it's un-necessary. Loop around + // the other live-in values and test whether they're all the same. bool Disagree = false; - bool NonBackEdgeDisagree = false; - - // Loop around everything that wasn't 'base'. for (unsigned int I = 1; I < BlockOrders.size(); ++I) { - auto *MBB = BlockOrders[I]; - if (BaseVal != OutLocs[MBB->getNumber()][Idx.asU64()]) { - // Live-out of a predecessor disagrees with the first predecessor. - Disagree = true; + const MachineBasicBlock *PredMBB = BlockOrders[I]; + const ValueIDNum &PredLiveOut = + OutLocs[PredMBB->getNumber()][Idx.asU64()]; - // Test whether it's a disagreemnt in the backedges or not. - if (BBToOrder.find(MBB)->second < ThisBlockRPO) // might be self b/e - NonBackEdgeDisagree = true; - } - } + // Incoming values agree, continue trying to eliminate this PHI. + if (FirstVal == PredLiveOut) + continue; - bool OverRide = false; - if (Disagree && !NonBackEdgeDisagree) { - // Only the backedges disagree. Consider demoting the livein - // lattice value, as per the file level comment. The value we consider - // demoting to is the value that the non-backedge predecessors agree on. - // The order of values is that non-PHIs are \top, a PHI at this block - // \bot, and phis between the two are ordered by their RPO number. - // If there's no agreement, or we've already demoted to this PHI value - // before, replace with a PHI value at this block. - - // Calculate order numbers: zero means normal def, nonzero means RPO - // number. - unsigned BaseBlockRPONum = BBNumToRPO[BaseVal.getBlock()] + 1; - if (!BaseVal.isPHI()) - BaseBlockRPONum = 0; - - ValueIDNum &InLocID = InLocs[Idx.asU64()]; - unsigned InLocRPONum = BBNumToRPO[InLocID.getBlock()] + 1; - if (!InLocID.isPHI()) - InLocRPONum = 0; - - // Should we ignore the disagreeing backedges, and override with the - // value the other predecessors agree on (in "base")? - unsigned ThisBlockRPONum = BBNumToRPO[MBB.getNumber()] + 1; - if (BaseBlockRPONum > InLocRPONum && BaseBlockRPONum < ThisBlockRPONum) { - // Override. - OverRide = true; - DowngradeOccurred = true; - } + // We can also accept a PHI value that feeds back into itself. + if (PredLiveOut == ValueIDNum(MBB.getNumber(), 0, Idx)) + continue; + + // Live-out of a predecessor disagrees with the first predecessor. + Disagree = true; } - // else: if we disagree in the non-backedges, then this is definitely - // a control flow merge where different values merge. Make it a PHI. - // Generate a phi... - ValueIDNum PHI = {(uint64_t)MBB.getNumber(), 0, Idx}; - ValueIDNum NewVal = (Disagree && !OverRide) ? PHI : BaseVal; - if (InLocs[Idx.asU64()] != NewVal) { + // No disagreement? No PHI. Otherwise, leave the PHI in live-ins. + if (!Disagree) { + InLocs[Idx.asU64()] = FirstVal; Changed |= true; - InLocs[Idx.asU64()] = NewVal; } } // TODO: Reimplement NumInserted and NumRemoved. - return std::tuple(Changed, DowngradeOccurred); + return Changed; } -void InstrRefBasedLDV::mlocDataflow( - ValueIDNum **MInLocs, ValueIDNum **MOutLocs, +void InstrRefBasedLDV::buildMLocValueMap( + MachineFunction &MF, ValueIDNum **MInLocs, ValueIDNum **MOutLocs, SmallVectorImpl &MLocTransfer) { std::priority_queue, std::greater> @@ -1899,20 +1807,59 @@ // but this is probably not worth it. SmallPtrSet OnPending, OnWorklist; - // Initialize worklist with every block to be visited. + // Initialize worklist with every block to be visited. Also produce list of + // all blocks. + SmallPtrSet AllBlocks; for (unsigned int I = 0; I < BBToOrder.size(); ++I) { Worklist.push(I); OnWorklist.insert(OrderToBB[I]); + AllBlocks.insert(OrderToBB[I]); } + // Initialize entry block to PHIs. These represent arguments. + for (auto Location : MTracker->locations()) + MInLocs[0][Location.Idx.asU64()] = ValueIDNum(0, 0, Location.Idx); + MTracker->reset(); - // Set inlocs for entry block -- each as a PHI at the entry block. Represents - // the incoming value to the function. - MTracker->setMPhis(0); - for (auto Location : MTracker->locations()) - MInLocs[0][Location.Idx.asU64()] = Location.Value; + // Start by placing PHIs, using the usual SSA constructor algorithm. Consider + // any machine-location that isn't live-through a block to be def'd in that + // block. + for (auto Location : MTracker->locations()) { + // Collect the set of defs. + SmallPtrSet DefBlocks; + for (unsigned int I = 0; I < OrderToBB.size(); ++I) { + MachineBasicBlock *MBB = OrderToBB[I]; + const auto &TransferFunc = MLocTransfer[MBB->getNumber()]; + if (TransferFunc.find(Location.Idx) != TransferFunc.end()) + DefBlocks.insert(MBB); + } + + // The entry block defs the location too: it's the live-in / argument value. + // Only insert if there are other defs though; everything is trivially live + // through otherwise. + if (!DefBlocks.empty()) + DefBlocks.insert(&*MF.begin()); + // Ask the SSA construction algorithm where we should put PHIs. + SmallVector PHIBlocks; + BlockPHIPlacement(AllBlocks, DefBlocks, PHIBlocks); + + // Install those PHI values into the live-in value array. + for (const MachineBasicBlock *MBB : PHIBlocks) { + MInLocs[MBB->getNumber()][Location.Idx.asU64()] = + ValueIDNum(MBB->getNumber(), 0, Location.Idx); + } + } + + // Propagate values to eliminate redundant PHIs. At the same time, this + // produces the table of Block x Location => Value for the entry to each + // block. + // The kind of PHIs we can eliminate are, for example, where one path in a + // conditional spills and restores a register, and the register still has + // the same value once control flow joins, unbeknowns to the PHI placement + // code. Propagating values allows us to identify such un-necessary PHIs and + // remove them. SmallPtrSet Visited; while (!Worklist.empty() || !Pending.empty()) { // Vector for storing the evaluated block transfer function. @@ -1924,16 +1871,10 @@ Worklist.pop(); // Join the values in all predecessor blocks. - bool InLocsChanged, DowngradeOccurred; - std::tie(InLocsChanged, DowngradeOccurred) = - mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]); + bool InLocsChanged; + InLocsChanged = mlocJoin(*MBB, Visited, MOutLocs, MInLocs[CurBB]); InLocsChanged |= Visited.insert(MBB).second; - // If a downgrade occurred, book us in for re-examination on the next - // iteration. - if (DowngradeOccurred && OnPending.insert(MBB).second) - Pending.push(BBToOrder[MBB]); - // Don't examine transfer function if we've visited this loc at least // once, and inlocs haven't changed. if (!InLocsChanged) @@ -1978,8 +1919,8 @@ continue; // All successors should be visited: put any back-edges on the pending - // list for the next dataflow iteration, and any other successors to be - // visited this iteration, if they're not going to be already. + // list for the next pass-through, and any other successors to be + // visited this pass, if they're not going to be already. for (auto s : MBB->successors()) { // Does branching to this successor represent a back-edge? if (BBToOrder[s] > BBToOrder[MBB]) { @@ -2002,8 +1943,55 @@ assert(Pending.empty() && "Pending should be empty"); } - // Once all the live-ins don't change on mlocJoin(), we've reached a - // fixedpoint. + // Once all the live-ins don't change on mlocJoin(), we've eliminated all + // redundant PHIs. +} + +// Boilerplate for feeding MachineBasicBlocks into IDF calculator. Provide +// template specialisations for graph traits and a successor enumerator. +namespace llvm { +template <> struct GraphTraits { + using NodeRef = MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::succ_iterator; + + static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; } + static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } +}; + +template <> struct GraphTraits { + using NodeRef = const MachineBasicBlock *; + using ChildIteratorType = MachineBasicBlock::const_succ_iterator; + + static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; } + static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } + static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } +}; + +using MachineDomTreeBase = DomTreeBase::NodeType; +using MachineDomTreeChildGetter = + typename IDFCalculatorDetail::ChildrenGetterTy; + +template <> +typename MachineDomTreeChildGetter::ChildrenTy +MachineDomTreeChildGetter::get(const NodeRef &N) { + return {N->succ_begin(), N->succ_end()}; +} +} // namespace llvm + +void InstrRefBasedLDV::BlockPHIPlacement( + const SmallPtrSetImpl &AllBlocks, + const SmallPtrSetImpl &DefBlocks, + SmallVectorImpl &PHIBlocks) { + // Apply IDF calculator to the designated set of location defs, storing + // required PHIs into PHIBlocks. Uses the dominator tree stored in the + // InstrRefBasedLDV object. + IDFCalculatorDetail::ChildrenGetterTy foo; + IDFCalculatorBase IDF(DomTree->getBase(), foo); + + IDF.setLiveInBlocks(AllBlocks); + IDF.setDefiningBlocks(DefBlocks); + IDF.calculate(PHIBlocks); } bool InstrRefBasedLDV::vlocDowngradeLattice( @@ -2756,7 +2744,9 @@ /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. -bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC, +bool InstrRefBasedLDV::ExtendRanges(MachineFunction &MF, + MachineDominatorTree *DomTree, + TargetPassConfig *TPC, unsigned InputBBLimit, unsigned InputDbgValLimit) { // No subprogram means this function contains no debuginfo. @@ -2766,6 +2756,7 @@ LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n"); this->TPC = TPC; + this->DomTree = DomTree; TRI = MF.getSubtarget().getRegisterInfo(); TII = MF.getSubtarget().getInstrInfo(); TFI = MF.getSubtarget().getFrameLowering(); @@ -2803,6 +2794,7 @@ ValueIDNum **MInLocs = new ValueIDNum *[MaxNumBlocks]; unsigned NumLocs = MTracker->getNumLocs(); for (int i = 0; i < MaxNumBlocks; ++i) { + // These all auto-initialize to ValueIDNum::EmptyValue MOutLocs[i] = new ValueIDNum[NumLocs]; MInLocs[i] = new ValueIDNum[NumLocs]; } @@ -2811,7 +2803,7 @@ // storing the computed live-ins / live-outs into the array-of-arrays. We use // both live-ins and live-outs for decision making in the variable value // dataflow problem. - mlocDataflow(MInLocs, MOutLocs, MLocTransfer); + buildMLocValueMap(MF, MInLocs, MOutLocs, MLocTransfer); // Patch up debug phi numbers, turning unknown block-live-in values into // either live-through machine values, or PHIs. diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H #define LLVM_LIB_CODEGEN_LIVEDEBUGVALUES_LIVEDEBUGVALUES_H +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -23,8 +24,8 @@ // implementation. class LDVImpl { public: - virtual bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC, - unsigned InputBBLimit, + virtual bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, + TargetPassConfig *TPC, unsigned InputBBLimit, unsigned InputDbgValLimit) = 0; virtual ~LDVImpl() {} }; diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp --- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp @@ -81,6 +81,7 @@ private: LDVImpl *TheImpl; TargetPassConfig *TPC; + MachineDominatorTree MDT; }; char LiveDebugValues::ID = 0; @@ -97,12 +98,12 @@ } bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { + bool InstrRefBased = MF.useDebugInstrRef(); + // Allow the user to force selection of InstrRef LDV. + InstrRefBased |= ForceInstrRefLDV; + if (!TheImpl) { TPC = getAnalysisIfAvailable(); - bool InstrRefBased = MF.useDebugInstrRef(); - - // Allow the user to force selection of InstrRef LDV. - InstrRefBased |= ForceInstrRefLDV; if (InstrRefBased) TheImpl = llvm::makeInstrRefBasedLiveDebugValues(); @@ -110,5 +111,12 @@ TheImpl = llvm::makeVarLocBasedLiveDebugValues(); } - return TheImpl->ExtendRanges(MF, TPC, InputBBLimit, InputDbgValueLimit); + MachineDominatorTree *DomTree = nullptr; + if (InstrRefBased) { + DomTree = &MDT; + MDT.calculate(MF); + } + + return TheImpl->ExtendRanges(MF, DomTree, TPC, InputBBLimit, + InputDbgValueLimit); } diff --git a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp --- a/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ b/llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -1014,8 +1014,9 @@ /// had their instruction creation deferred. void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs); - bool ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC, - unsigned InputBBLimit, unsigned InputDbgValLimit) override; + bool ExtendRanges(MachineFunction &MF, MachineDominatorTree *DomTree, + TargetPassConfig *TPC, unsigned InputBBLimit, + unsigned InputDbgValLimit) override; public: /// Default construct and initialize the pass. @@ -2107,9 +2108,11 @@ /// Calculate the liveness information for the given machine function and /// extend ranges across basic blocks. -bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, TargetPassConfig *TPC, - unsigned InputBBLimit, +bool VarLocBasedLDV::ExtendRanges(MachineFunction &MF, + MachineDominatorTree *DomTree, + TargetPassConfig *TPC, unsigned InputBBLimit, unsigned InputDbgValLimit) { + (void)DomTree; LLVM_DEBUG(dbgs() << "\nDebug Range Extension\n"); if (!MF.getFunction().getSubprogram()) diff --git a/llvm/unittests/CodeGen/InstrRefLDVTest.cpp b/llvm/unittests/CodeGen/InstrRefLDVTest.cpp --- a/llvm/unittests/CodeGen/InstrRefLDVTest.cpp +++ b/llvm/unittests/CodeGen/InstrRefLDVTest.cpp @@ -29,9 +29,14 @@ class InstrRefLDVTest : public testing::Test { public: + friend class InstrRefBasedLDV; + using MLocTransferMap = InstrRefBasedLDV::MLocTransferMap; + LLVMContext Ctx; Module Mod; + std::unique_ptr Machine; std::unique_ptr MF; + std::unique_ptr DomTree; DICompileUnit *OurCU; DIFile *OurFile; DISubprogram *OurFunc; @@ -41,33 +46,36 @@ DebugLoc OutermostLoc, InBlockLoc, NotNestedBlockLoc, InlinedLoc; - MachineBasicBlock *MBB1, *MBB2, *MBB3, *MBB4; + MachineBasicBlock *MBB1, *MBB2, *MBB3, *MBB4, *MBB5; + + std::unique_ptr LDV; + std::unique_ptr MTracker; InstrRefLDVTest() : Ctx(), Mod("beehives", Ctx) { + } + + void SetUp() { // Boilerplate that creates a MachineFunction and associated blocks. - MF = createMachineFunction(Ctx, Mod); - llvm::Function &F = const_cast(MF->getFunction()); - auto BB1 = BasicBlock::Create(Ctx, "a", &F); - auto BB2 = BasicBlock::Create(Ctx, "b", &F); - auto BB3 = BasicBlock::Create(Ctx, "c", &F); - auto BB4 = BasicBlock::Create(Ctx, "d", &F); - IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4); - IRB1.CreateBr(BB2); - IRB2.CreateBr(BB3); - IRB3.CreateBr(BB4); - IRB4.CreateRetVoid(); - MBB1 = MF->CreateMachineBasicBlock(BB1); - MF->insert(MF->end(), MBB1); - MBB2 = MF->CreateMachineBasicBlock(BB2); - MF->insert(MF->end(), MBB2); - MBB3 = MF->CreateMachineBasicBlock(BB3); - MF->insert(MF->end(), MBB3); - MBB4 = MF->CreateMachineBasicBlock(BB4); - MF->insert(MF->end(), MBB4); - MBB1->addSuccessor(MBB2); - MBB1->addSuccessor(MBB3); - MBB2->addSuccessor(MBB4); - MBB3->addSuccessor(MBB4); + + Mod.setDataLayout("e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128"); + Triple TargetTriple("x86_64--"); + std::string Error; + const Target *T = TargetRegistry::lookupTarget("", TargetTriple, Error); + if (!T) + GTEST_SKIP(); + + TargetOptions Options; + Machine = std::unique_ptr(T->createTargetMachine( + "X86", "", "", Options, None, None, CodeGenOpt::Aggressive)); + + auto Type = FunctionType::get(Type::getVoidTy(Ctx), false); + auto F = Function::Create(Type, GlobalValue::ExternalLinkage, "Test", &Mod); + + unsigned FunctionNum = 42; + MachineModuleInfo MMI((LLVMTargetMachine*)&*Machine); + const TargetSubtargetInfo &STI = *Machine->getSubtargetImpl(*F); + + MF = std::make_unique(*F, (LLVMTargetMachine&)*Machine, STI, FunctionNum, MMI); // Create metadata: CU, subprogram, some blocks and an inline function // scope. @@ -79,7 +87,7 @@ OurFunc = DIB.createFunction(OurCU, "bees", "", OurFile, 1, OurSubT, 1, DINode::FlagZero, DISubprogram::SPFlagDefinition); - F.setSubprogram(OurFunc); + F->setSubprogram(OurFunc); OurBlock = DIB.createLexicalBlock(OurFunc, OurFile, 2, 3); AnotherBlock = DIB.createLexicalBlock(OurFunc, OurFile, 2, 6); ToInlineFunc = @@ -96,4 +104,994 @@ DIB.finalize(); } + + Register getRegByName(const char *WantedName) { + auto *TRI = MF->getRegInfo().getTargetRegisterInfo(); + // Slow, but works. + for (unsigned int I = 1; I < TRI->getNumRegs(); ++I) { + const char *Name = TRI->getName(I); + if (strcmp(WantedName, Name) == 0) + return I; + } + + // If this ever fails, something is very wrong with this unit test. + llvm_unreachable("Can't find register by name"); + } + + InstrRefBasedLDV *setupLDVObj() { + // Create a new LDV object, and plug some relevant object ptrs into it. + LDV = std::make_unique(); + const TargetSubtargetInfo &STI = MF->getSubtarget(); + LDV->TII = STI.getInstrInfo(); + LDV->TRI = STI.getRegisterInfo(); + LDV->TFI = STI.getFrameLowering(); + LDV->MFI = &MF->getFrameInfo(); + + DomTree = std::make_unique(*MF); + LDV->DomTree = &*DomTree; + + // Future work: unit tests for mtracker / vtracker / ttracker. + + // Setup things like the artifical block map, and BlockNo <=> RPO Order + // mappings. + LDV->initialSetup(*MF); + addMTracker(); + return &*LDV; + } + + void addMTracker() { + ASSERT_TRUE(LDV); + // Add a machine-location-tracking object to LDV. Don't initialize any + // register locations within it though. + const TargetSubtargetInfo &STI = MF->getSubtarget(); + MTracker = std::make_unique( + *MF, *LDV->TII, *LDV->TRI, *STI.getTargetLowering()); + LDV->MTracker = &*MTracker; + } + + // Some routines for bouncing into LDV, + void buildMLocValueMap(ValueIDNum **MInLocs, ValueIDNum **MOutLocs, + SmallVectorImpl &MLocTransfer) { + LDV->buildMLocValueMap(*MF, MInLocs, MOutLocs, MLocTransfer); + } + + void initValueArray(ValueIDNum **Nums, unsigned Blks, unsigned Locs) { + for (unsigned int I = 0; I < Blks; ++I) + for (unsigned int J = 0; J < Locs; ++J) + Nums[I][J] = ValueIDNum::EmptyValue; + } }; + +TEST_F(InstrRefLDVTest, MLocSingleBlock) { + // Test some very simple properties about interpreting the transfer function. + + // Add an entry block with nothing but 'ret void' in it. + Function &F = const_cast(MF->getFunction()); + auto *BB1 = BasicBlock::Create(Ctx, "entry", &F); + IRBuilder<> IRB(BB1); + IRB.CreateRetVoid(); + MBB1 = MF->CreateMachineBasicBlock(BB1); + MF->insert(MF->end(), MBB1); + MF->RenumberBlocks(); + + setupLDVObj(); + // We should start with a single location, the stack pointer. + ASSERT_TRUE(MTracker->getNumLocs() == 1); + LocIdx RspLoc(0); + + // Set up live-in and live-out tables for this function: two locations (we + // add one later) in a single block. + ValueIDNum InLocs[2], OutLocs[2]; + ValueIDNum *InLocsPtr[1] = {&InLocs[0]}; + ValueIDNum *OutLocsPtr[1] = {&OutLocs[0]}; + + // Transfer function: nothing. + SmallVector TransferFunc = {{}}; + + // Try and build value maps... + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + + // The result should be that RSP is marked as a live-in-PHI -- this represents + // an argument. And as there's no transfer function, the block live-out should + // be the same. + EXPECT_EQ(InLocs[0], ValueIDNum(0, 0, RspLoc)); + EXPECT_EQ(OutLocs[0], ValueIDNum(0, 0, RspLoc)); + + // Try again, this time initialising the in-locs to be defined by an + // instruction. The entry block should always be re-assigned to be the + // arguments. + initValueArray(InLocsPtr, 1, 2); + initValueArray(OutLocsPtr, 1, 2); + InLocs[0] = ValueIDNum(0, 1, RspLoc); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0], ValueIDNum(0, 0, RspLoc)); + EXPECT_EQ(OutLocs[0], ValueIDNum(0, 0, RspLoc)); + + // Now insert something into the transfer function to assign to the single + // machine location. + TransferFunc[0].insert({RspLoc, ValueIDNum(0, 1, RspLoc)}); + initValueArray(InLocsPtr, 1, 2); + initValueArray(OutLocsPtr, 1, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0], ValueIDNum(0, 0, RspLoc)); + EXPECT_EQ(OutLocs[0], ValueIDNum(0, 1, RspLoc)); + TransferFunc[0].clear(); + + // Add a new register to be tracked, and insert it into the transfer function + // as a copy. The output of $rax should be the live-in value of $rsp. + Register RAX = getRegByName("RAX"); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + TransferFunc[0].insert({RspLoc, ValueIDNum(0, 1, RspLoc)}); + TransferFunc[0].insert({RaxLoc, ValueIDNum(0, 0, RspLoc)}); + initValueArray(InLocsPtr, 1, 2); + initValueArray(OutLocsPtr, 1, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0], ValueIDNum(0, 0, RspLoc)); + EXPECT_EQ(InLocs[1], ValueIDNum(0, 0, RaxLoc)); + EXPECT_EQ(OutLocs[0], ValueIDNum(0, 1, RspLoc)); + EXPECT_EQ(OutLocs[1], ValueIDNum(0, 0, RspLoc)); // Rax contains RspLoc. + TransferFunc[0].clear(); +} + +TEST_F(InstrRefLDVTest, MLocDiamondBlocks) { + // Test that information flows from the entry block to two successors. + + // entry + // / \ + // br1 br2 + // \ / + // ret + llvm::Function &F = const_cast(MF->getFunction()); + auto *BB1 = BasicBlock::Create(Ctx, "a", &F); + auto *BB2 = BasicBlock::Create(Ctx, "b", &F); + auto *BB3 = BasicBlock::Create(Ctx, "c", &F); + auto *BB4 = BasicBlock::Create(Ctx, "d", &F); + IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4); + IRB1.CreateBr(BB2); + IRB2.CreateBr(BB3); + IRB3.CreateBr(BB4); + IRB4.CreateRetVoid(); + MBB1 = MF->CreateMachineBasicBlock(BB1); + MF->insert(MF->end(), MBB1); + MBB2 = MF->CreateMachineBasicBlock(BB2); + MF->insert(MF->end(), MBB2); + MBB3 = MF->CreateMachineBasicBlock(BB3); + MF->insert(MF->end(), MBB3); + MBB4 = MF->CreateMachineBasicBlock(BB4); + MF->insert(MF->end(), MBB4); + MBB1->addSuccessor(MBB2); + MBB1->addSuccessor(MBB3); + MBB2->addSuccessor(MBB4); + MBB3->addSuccessor(MBB4); + MF->RenumberBlocks(); + + setupLDVObj(); + + ASSERT_TRUE(MTracker->getNumLocs() == 1); + LocIdx RspLoc(0); + Register RAX = getRegByName("RAX"); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + + ValueIDNum InLocs[4][2], OutLocs[4][2]; + ValueIDNum *InLocsPtr[4] = {InLocs[0], InLocs[1], InLocs[2], InLocs[3]}; + ValueIDNum *OutLocsPtr[4] = {OutLocs[0], OutLocs[1], OutLocs[2], OutLocs[3]}; + + // Transfer function: start with nothing. + SmallVector TransferFunc; + TransferFunc.resize(4); + + // Name some values. + ValueIDNum LiveInRsp(0, 0, RspLoc); + ValueIDNum RspDefInBlk0(0, 1, RspLoc); + ValueIDNum RspDefInBlk1(1, 1, RspLoc); + ValueIDNum RspDefInBlk2(2, 1, RspLoc); + ValueIDNum RspPHIInBlk3(3, 0, RspLoc); + ValueIDNum RaxLiveInBlk1(1, 0, RaxLoc); + ValueIDNum RaxLiveInBlk2(2, 0, RaxLoc); + + // With no transfer function, the live-in values to the entry block should + // propagate to all live-outs and the live-ins to the two successor blocks. + // IN ADDITION: this checks that the exit block doesn't get a PHI put in it. + initValueArray(InLocsPtr, 4, 2); + initValueArray(OutLocsPtr, 4, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], LiveInRsp); + EXPECT_EQ(InLocs[2][0], LiveInRsp); + EXPECT_EQ(InLocs[3][0], LiveInRsp); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], LiveInRsp); + EXPECT_EQ(OutLocs[2][0], LiveInRsp); + EXPECT_EQ(OutLocs[3][0], LiveInRsp); + // (Skipped writing out locations for $rax). + + // Check that a def of $rsp in the entry block will likewise reach all the + // successors. + TransferFunc[0].insert({RspLoc, RspDefInBlk0}); + initValueArray(InLocsPtr, 4, 2); + initValueArray(OutLocsPtr, 4, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspDefInBlk0); + EXPECT_EQ(InLocs[2][0], RspDefInBlk0); + EXPECT_EQ(InLocs[3][0], RspDefInBlk0); + EXPECT_EQ(OutLocs[0][0], RspDefInBlk0); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk0); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk0); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk0); + TransferFunc[0].clear(); + + // Def in one branch of the diamond means that we need a PHI in the ret block + TransferFunc[0].insert({RspLoc, RspDefInBlk0}); + TransferFunc[1].insert({RspLoc, RspDefInBlk1}); + initValueArray(InLocsPtr, 4, 2); + initValueArray(OutLocsPtr, 4, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + // This value map: like above, where RspDefInBlk0 is propagated through one + // branch of the diamond, but is def'ed in the live-outs of the other. The + // ret / merging block should have a PHI in its live-ins. + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspDefInBlk0); + EXPECT_EQ(InLocs[2][0], RspDefInBlk0); + EXPECT_EQ(InLocs[3][0], RspPHIInBlk3); + EXPECT_EQ(OutLocs[0][0], RspDefInBlk0); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk0); + EXPECT_EQ(OutLocs[3][0], RspPHIInBlk3); + TransferFunc[0].clear(); + TransferFunc[1].clear(); + + // If we have differeing defs in either side of the diamond, we should + // continue to produce a PHI, + TransferFunc[0].insert({RspLoc, RspDefInBlk0}); + TransferFunc[1].insert({RspLoc, RspDefInBlk1}); + TransferFunc[2].insert({RspLoc, RspDefInBlk2}); + initValueArray(InLocsPtr, 4, 2); + initValueArray(OutLocsPtr, 4, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspDefInBlk0); + EXPECT_EQ(InLocs[2][0], RspDefInBlk0); + EXPECT_EQ(InLocs[3][0], RspPHIInBlk3); + EXPECT_EQ(OutLocs[0][0], RspDefInBlk0); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk2); + EXPECT_EQ(OutLocs[3][0], RspPHIInBlk3); + TransferFunc[0].clear(); + TransferFunc[1].clear(); + TransferFunc[2].clear(); + + // If we have defs of the same value on either side of the branch, a PHI will + // initially be created, however value propagation should then eliminate it. + // Encode this by copying the live-in value to $rax, and copying it to $rsp + // from $rax in each branch of the diamond. We don't allow the definition of + // arbitary values in transfer functions. + TransferFunc[0].insert({RspLoc, RspDefInBlk0}); + TransferFunc[0].insert({RaxLoc, LiveInRsp}); + TransferFunc[1].insert({RspLoc, RaxLiveInBlk1}); + TransferFunc[2].insert({RspLoc, RaxLiveInBlk2}); + initValueArray(InLocsPtr, 4, 2); + initValueArray(OutLocsPtr, 4, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspDefInBlk0); + EXPECT_EQ(InLocs[2][0], RspDefInBlk0); + EXPECT_EQ(InLocs[3][0], LiveInRsp); + EXPECT_EQ(OutLocs[0][0], RspDefInBlk0); + EXPECT_EQ(OutLocs[1][0], LiveInRsp); + EXPECT_EQ(OutLocs[2][0], LiveInRsp); + EXPECT_EQ(OutLocs[3][0], LiveInRsp); + TransferFunc[0].clear(); + TransferFunc[1].clear(); + TransferFunc[2].clear(); +} + +TEST_F(InstrRefLDVTest, MLocSimpleLoop) { + // entry + // | + // |/-----\ + // loopblk | + // |\-----/ + // | + // ret + llvm::Function &F = const_cast(MF->getFunction()); + auto *BB1 = BasicBlock::Create(Ctx, "entry", &F); + auto *BB2 = BasicBlock::Create(Ctx, "loop", &F); + auto *BB3 = BasicBlock::Create(Ctx, "ret", &F); + IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3); + IRB1.CreateBr(BB2); + IRB2.CreateBr(BB3); + IRB3.CreateRetVoid(); + MBB1 = MF->CreateMachineBasicBlock(BB1); + MF->insert(MF->end(), MBB1); + MBB2 = MF->CreateMachineBasicBlock(BB2); + MF->insert(MF->end(), MBB2); + MBB3 = MF->CreateMachineBasicBlock(BB3); + MF->insert(MF->end(), MBB3); + MBB1->addSuccessor(MBB2); + MBB2->addSuccessor(MBB3); + MBB2->addSuccessor(MBB2); + MF->RenumberBlocks(); + + setupLDVObj(); + + ASSERT_TRUE(MTracker->getNumLocs() == 1); + LocIdx RspLoc(0); + Register RAX = getRegByName("RAX"); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + + ValueIDNum InLocs[3][2], OutLocs[3][2]; + ValueIDNum *InLocsPtr[3] = {InLocs[0], InLocs[1], InLocs[2]}; + ValueIDNum *OutLocsPtr[3] = {OutLocs[0], OutLocs[1], OutLocs[2]}; + + SmallVector TransferFunc; + TransferFunc.resize(3); + + // Name some values. + ValueIDNum LiveInRsp(0, 0, RspLoc); + ValueIDNum RspPHIInBlk1(1, 0, RspLoc); + ValueIDNum RspDefInBlk1(1, 1, RspLoc); + ValueIDNum LiveInRax(0, 0, RaxLoc); + ValueIDNum RaxPHIInBlk1(1, 0, RaxLoc); + ValueIDNum RaxPHIInBlk2(2, 0, RaxLoc); + + // Begin test with all locations being live-through. + initValueArray(InLocsPtr, 3, 2); + initValueArray(OutLocsPtr, 3, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], LiveInRsp); + EXPECT_EQ(InLocs[2][0], LiveInRsp); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], LiveInRsp); + EXPECT_EQ(OutLocs[2][0], LiveInRsp); + + // Add a def of $rsp to the loop block: it should be in the live-outs, but + // should cause a PHI to be placed in the live-ins. Test the transfer function + // by copying that PHI into $rax in the loop, then back to $rsp in the ret + // block. + TransferFunc[1].insert({RspLoc, RspDefInBlk1}); + TransferFunc[1].insert({RaxLoc, RspPHIInBlk1}); + TransferFunc[2].insert({RspLoc, RaxPHIInBlk2}); + initValueArray(InLocsPtr, 3, 2); + initValueArray(OutLocsPtr, 3, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[2][0], RspPHIInBlk1); + // Check rax as well, + EXPECT_EQ(InLocs[0][1], LiveInRax); + EXPECT_EQ(InLocs[1][1], RaxPHIInBlk1); + EXPECT_EQ(InLocs[2][1], RspPHIInBlk1); + EXPECT_EQ(OutLocs[0][1], LiveInRax); + EXPECT_EQ(OutLocs[1][1], RspPHIInBlk1); + EXPECT_EQ(OutLocs[2][1], RspPHIInBlk1); + TransferFunc[1].clear(); + TransferFunc[2].clear(); + + // As with the diamond case, a PHI will be created if there's a (implicit) + // def in the entry block and loop block; but should be value propagated away + // if it copies in the same value. Copy live-in $rsp to $rax, then copy it + // into $rsp in the loop. Encoded as copying the live-in $rax value in block 1 + // to $rsp. + TransferFunc[0].insert({RaxLoc, LiveInRsp}); + TransferFunc[1].insert({RspLoc, RaxPHIInBlk1}); + initValueArray(InLocsPtr, 3, 2); + initValueArray(OutLocsPtr, 3, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], LiveInRsp); + EXPECT_EQ(InLocs[2][0], LiveInRsp); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], LiveInRsp); + EXPECT_EQ(OutLocs[2][0], LiveInRsp); + // Check $rax's values. + EXPECT_EQ(InLocs[0][1], LiveInRax); + EXPECT_EQ(InLocs[1][1], LiveInRsp); + EXPECT_EQ(InLocs[2][1], LiveInRsp); + EXPECT_EQ(OutLocs[0][1], LiveInRsp); + EXPECT_EQ(OutLocs[1][1], LiveInRsp); + EXPECT_EQ(OutLocs[2][1], LiveInRsp); + TransferFunc[0].clear(); + TransferFunc[1].clear(); +} + +TEST_F(InstrRefLDVTest, MLocNestedLoop) { + // entry + // | + // loop1 + // ^\ + // | \ /-\ + // | loop2 | + // | / \-/ + // ^ / + // join + // | + // ret + llvm::Function &F = const_cast(MF->getFunction()); + auto *BB1 = BasicBlock::Create(Ctx, "entry", &F); + auto *BB2 = BasicBlock::Create(Ctx, "loop1", &F); + auto *BB3 = BasicBlock::Create(Ctx, "loop2", &F); + auto *BB4 = BasicBlock::Create(Ctx, "join", &F); + auto *BB5 = BasicBlock::Create(Ctx, "ret", &F); + IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4), IRB5(BB5); + IRB1.CreateBr(BB2); + IRB2.CreateBr(BB3); + IRB3.CreateBr(BB4); + IRB4.CreateBr(BB5); + IRB5.CreateRetVoid(); + MBB1 = MF->CreateMachineBasicBlock(BB1); + MF->insert(MF->end(), MBB1); + MBB2 = MF->CreateMachineBasicBlock(BB2); + MF->insert(MF->end(), MBB2); + MBB3 = MF->CreateMachineBasicBlock(BB3); + MF->insert(MF->end(), MBB3); + MBB4 = MF->CreateMachineBasicBlock(BB4); + MF->insert(MF->end(), MBB4); + MBB5 = MF->CreateMachineBasicBlock(BB5); + MF->insert(MF->end(), MBB5); + MBB1->addSuccessor(MBB2); + MBB2->addSuccessor(MBB3); + MBB3->addSuccessor(MBB3); + MBB3->addSuccessor(MBB4); + MBB4->addSuccessor(MBB2); + MBB4->addSuccessor(MBB5); + MF->RenumberBlocks(); + + setupLDVObj(); + + ASSERT_TRUE(MTracker->getNumLocs() == 1); + LocIdx RspLoc(0); + Register RAX = getRegByName("RAX"); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + + ValueIDNum InLocs[5][2], OutLocs[5][2]; + ValueIDNum *InLocsPtr[5] = {InLocs[0], InLocs[1], InLocs[2], InLocs[3], + InLocs[4]}; + ValueIDNum *OutLocsPtr[5] = {OutLocs[0], OutLocs[1], OutLocs[2], OutLocs[3], + OutLocs[4]}; + + SmallVector TransferFunc; + TransferFunc.resize(5); + + ValueIDNum LiveInRsp(0, 0, RspLoc); + ValueIDNum RspPHIInBlk1(1, 0, RspLoc); + ValueIDNum RspDefInBlk1(1, 1, RspLoc); + ValueIDNum RspPHIInBlk2(2, 0, RspLoc); + ValueIDNum RspDefInBlk2(2, 1, RspLoc); + ValueIDNum RspDefInBlk3(3, 1, RspLoc); + ValueIDNum LiveInRax(0, 0, RaxLoc); + ValueIDNum RaxPHIInBlk1(1, 0, RaxLoc); + ValueIDNum RaxPHIInBlk2(2, 0, RaxLoc); + + // Like the other tests: first ensure that if there's nothing in the transfer + // function, then everything is live-through (check $rsp). + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], LiveInRsp); + EXPECT_EQ(InLocs[2][0], LiveInRsp); + EXPECT_EQ(InLocs[3][0], LiveInRsp); + EXPECT_EQ(InLocs[4][0], LiveInRsp); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], LiveInRsp); + EXPECT_EQ(OutLocs[2][0], LiveInRsp); + EXPECT_EQ(OutLocs[3][0], LiveInRsp); + EXPECT_EQ(OutLocs[4][0], LiveInRsp); + + // A def in the inner loop means we should get PHIs at the heads of both + // loops. Live-outs of the last three blocks will be the def, as it dominates + // those. + TransferFunc[2].insert({RspLoc, RspDefInBlk2}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(InLocs[3][0], RspDefInBlk2); + EXPECT_EQ(InLocs[4][0], RspDefInBlk2); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk2); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk2); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk2); + TransferFunc[2].clear(); + + // Adding a def to the outer loop header shouldn't affect this much -- the + // live-out of block 1 changes. + TransferFunc[1].insert({RspLoc, RspDefInBlk1}); + TransferFunc[2].insert({RspLoc, RspDefInBlk2}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(InLocs[3][0], RspDefInBlk2); + EXPECT_EQ(InLocs[4][0], RspDefInBlk2); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk2); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk2); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk2); + TransferFunc[1].clear(); + TransferFunc[2].clear(); + + // Likewise, putting a def in the outer loop tail shouldn't affect where + // the PHIs go, and should propagate into the ret block. + + TransferFunc[1].insert({RspLoc, RspDefInBlk1}); + TransferFunc[2].insert({RspLoc, RspDefInBlk2}); + TransferFunc[3].insert({RspLoc, RspDefInBlk3}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(InLocs[3][0], RspDefInBlk2); + EXPECT_EQ(InLocs[4][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk2); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk3); + TransferFunc[1].clear(); + TransferFunc[2].clear(); + TransferFunc[3].clear(); + + // However: if we don't def in the inner-loop, then we just have defs in the + // head and tail of the outer loop. The inner loop should be live-through. + TransferFunc[1].insert({RspLoc, RspDefInBlk1}); + TransferFunc[3].insert({RspLoc, RspDefInBlk3}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspDefInBlk1); + EXPECT_EQ(InLocs[3][0], RspDefInBlk1); + EXPECT_EQ(InLocs[4][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk3); + TransferFunc[1].clear(); + TransferFunc[3].clear(); + + // Check that this still works if we copy RspDefInBlk1 to $rax and then + // copy it back into $rsp in the inner loop. + TransferFunc[1].insert({RspLoc, RspDefInBlk1}); + TransferFunc[1].insert({RaxLoc, RspDefInBlk1}); + TransferFunc[2].insert({RspLoc, RaxPHIInBlk2}); + TransferFunc[3].insert({RspLoc, RspDefInBlk3}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspDefInBlk1); + EXPECT_EQ(InLocs[3][0], RspDefInBlk1); + EXPECT_EQ(InLocs[4][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk3); + // Look at raxes value in the relevant blocks, + EXPECT_EQ(InLocs[2][1], RspDefInBlk1); + EXPECT_EQ(OutLocs[1][1], RspDefInBlk1); + TransferFunc[1].clear(); + TransferFunc[2].clear(); + TransferFunc[3].clear(); + + // If we have a single def in the tail of the outer loop, that should produce + // a PHI at the loop head, and be live-through the inner loop. + TransferFunc[3].insert({RspLoc, RspDefInBlk3}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[3][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[4][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(OutLocs[2][0], RspPHIInBlk1); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk3); + TransferFunc[3].clear(); + + // And if we copy from $rsp to $rax in block 2, it should resolve to the PHI + // in block 1, and we should keep that value in rax until the ret block. + // There'll be a PHI in block 1 and 2, because we're putting a def in the + // inner loop. + TransferFunc[2].insert({RaxLoc, RspPHIInBlk2}); + TransferFunc[3].insert({RspLoc, RspDefInBlk3}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + // Examining the values of rax, + EXPECT_EQ(InLocs[0][1], LiveInRax); + EXPECT_EQ(InLocs[1][1], RaxPHIInBlk1); + EXPECT_EQ(InLocs[2][1], RaxPHIInBlk2); + EXPECT_EQ(InLocs[3][1], RspPHIInBlk1); + EXPECT_EQ(InLocs[4][1], RspPHIInBlk1); + EXPECT_EQ(OutLocs[0][1], LiveInRax); + EXPECT_EQ(OutLocs[1][1], RaxPHIInBlk1); + EXPECT_EQ(OutLocs[2][1], RspPHIInBlk1); + EXPECT_EQ(OutLocs[3][1], RspPHIInBlk1); + EXPECT_EQ(OutLocs[4][1], RspPHIInBlk1); + TransferFunc[2].clear(); + TransferFunc[3].clear(); +} + +TEST_F(InstrRefLDVTest, MLocNoDominatingLoop) { + // entry + // / \ + // / \ + // / \ + // head1 head2 + // ^ \ / ^ + // ^ \ / ^ + // \-joinblk -/ + // | + // ret + llvm::Function &F = const_cast(MF->getFunction()); + auto *BB1 = BasicBlock::Create(Ctx, "entry", &F); + auto *BB2 = BasicBlock::Create(Ctx, "head1", &F); + auto *BB3 = BasicBlock::Create(Ctx, "head2", &F); + auto *BB4 = BasicBlock::Create(Ctx, "joinblk", &F); + auto *BB5 = BasicBlock::Create(Ctx, "ret", &F); + IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4), IRB5(BB5); + IRB1.CreateBr(BB2); + IRB2.CreateBr(BB3); + IRB3.CreateBr(BB4); + IRB4.CreateBr(BB5); + IRB5.CreateRetVoid(); + MBB1 = MF->CreateMachineBasicBlock(BB1); + MF->insert(MF->end(), MBB1); + MBB2 = MF->CreateMachineBasicBlock(BB2); + MF->insert(MF->end(), MBB2); + MBB3 = MF->CreateMachineBasicBlock(BB3); + MF->insert(MF->end(), MBB3); + MBB4 = MF->CreateMachineBasicBlock(BB4); + MF->insert(MF->end(), MBB4); + MBB5 = MF->CreateMachineBasicBlock(BB5); + MF->insert(MF->end(), MBB5); + MBB1->addSuccessor(MBB2); + MBB1->addSuccessor(MBB3); + MBB2->addSuccessor(MBB4); + MBB3->addSuccessor(MBB4); + MBB4->addSuccessor(MBB2); + MBB4->addSuccessor(MBB3); + MBB4->addSuccessor(MBB5); + MF->RenumberBlocks(); + + setupLDVObj(); + + ASSERT_TRUE(MTracker->getNumLocs() == 1); + LocIdx RspLoc(0); + Register RAX = getRegByName("RAX"); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + + ValueIDNum InLocs[5][2], OutLocs[5][2]; + ValueIDNum *InLocsPtr[5] = {InLocs[0], InLocs[1], InLocs[2], InLocs[3], + InLocs[4]}; + ValueIDNum *OutLocsPtr[5] = {OutLocs[0], OutLocs[1], OutLocs[2], OutLocs[3], + OutLocs[4]}; + + SmallVector TransferFunc; + TransferFunc.resize(5); + + ValueIDNum LiveInRsp(0, 0, RspLoc); + ValueIDNum RspPHIInBlk1(1, 0, RspLoc); + ValueIDNum RspDefInBlk1(1, 1, RspLoc); + ValueIDNum RspPHIInBlk2(2, 0, RspLoc); + ValueIDNum RspDefInBlk2(2, 1, RspLoc); + ValueIDNum RspPHIInBlk3(3, 0, RspLoc); + ValueIDNum RspDefInBlk3(3, 1, RspLoc); + ValueIDNum RaxPHIInBlk1(1, 0, RaxLoc); + ValueIDNum RaxPHIInBlk2(2, 0, RaxLoc); + + // As ever, test that everything is live-through if there are no defs. + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], LiveInRsp); + EXPECT_EQ(InLocs[2][0], LiveInRsp); + EXPECT_EQ(InLocs[3][0], LiveInRsp); + EXPECT_EQ(InLocs[4][0], LiveInRsp); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], LiveInRsp); + EXPECT_EQ(OutLocs[2][0], LiveInRsp); + EXPECT_EQ(OutLocs[3][0], LiveInRsp); + EXPECT_EQ(OutLocs[4][0], LiveInRsp); + + // Putting a def in the 'join' block will cause us to have two distinct + // PHIs in each loop head, then on entry to the join block. + TransferFunc[3].insert({RspLoc, RspDefInBlk3}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(InLocs[3][0], RspPHIInBlk3); + EXPECT_EQ(InLocs[4][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(OutLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk3); + TransferFunc[3].clear(); + + // We should get the same behaviour if we put the def in either of the + // loop heads -- it should force the other head to be a PHI. + TransferFunc[1].insert({RspLoc, RspDefInBlk1}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(InLocs[3][0], RspPHIInBlk3); + EXPECT_EQ(InLocs[4][0], RspPHIInBlk3); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(OutLocs[3][0], RspPHIInBlk3); + EXPECT_EQ(OutLocs[4][0], RspPHIInBlk3); + TransferFunc[1].clear(); + + // Check symmetry, + TransferFunc[2].insert({RspLoc, RspDefInBlk2}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(InLocs[3][0], RspPHIInBlk3); + EXPECT_EQ(InLocs[4][0], RspPHIInBlk3); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk2); + EXPECT_EQ(OutLocs[3][0], RspPHIInBlk3); + EXPECT_EQ(OutLocs[4][0], RspPHIInBlk3); + TransferFunc[2].clear(); + + // Test some scenarios where there _shouldn't_ be any PHIs created at heads. + // These are those PHIs are created, but value propagation eliminates them. + // For example, lets copy rsp-livein to $rsp inside each loop head, so that + // there's no need for a PHI in the join block. Put a def of $rsp in block 3 + // to force PHIs elsewhere. + TransferFunc[0].insert({RaxLoc, LiveInRsp}); + TransferFunc[1].insert({RspLoc, RaxPHIInBlk1}); + TransferFunc[2].insert({RspLoc, RaxPHIInBlk2}); + TransferFunc[3].insert({RspLoc, RspDefInBlk3}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(InLocs[3][0], LiveInRsp); + EXPECT_EQ(InLocs[4][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], LiveInRsp); + EXPECT_EQ(OutLocs[2][0], LiveInRsp); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk3); + TransferFunc[0].clear(); + TransferFunc[1].clear(); + TransferFunc[2].clear(); + TransferFunc[3].clear(); + + // In fact, if we eliminate the def in block 3, none of those PHIs are + // necessary, as we're just repeatedly copying LiveInRsp into $rsp. They + // should all be value propagated out. + TransferFunc[0].insert({RaxLoc, LiveInRsp}); + TransferFunc[1].insert({RspLoc, RaxPHIInBlk1}); + TransferFunc[2].insert({RspLoc, RaxPHIInBlk2}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], LiveInRsp); + EXPECT_EQ(InLocs[2][0], LiveInRsp); + EXPECT_EQ(InLocs[3][0], LiveInRsp); + EXPECT_EQ(InLocs[4][0], LiveInRsp); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], LiveInRsp); + EXPECT_EQ(OutLocs[2][0], LiveInRsp); + EXPECT_EQ(OutLocs[3][0], LiveInRsp); + EXPECT_EQ(OutLocs[4][0], LiveInRsp); + TransferFunc[0].clear(); + TransferFunc[1].clear(); + TransferFunc[2].clear(); +} + +TEST_F(InstrRefLDVTest, MLocBadlyNestedLoops) { + // entry + // | + // loop1 -o + // | ^ + // | ^ + // loop2 -o + // | ^ + // | ^ + // loop3 -o + // | + // ret + // + // NB: the loop blocks self-loop, which is a bit too fiddly to draw on + // accurately. + llvm::Function &F = const_cast(MF->getFunction()); + auto *BB1 = BasicBlock::Create(Ctx, "entry", &F); + auto *BB2 = BasicBlock::Create(Ctx, "loop1", &F); + auto *BB3 = BasicBlock::Create(Ctx, "loop2", &F); + auto *BB4 = BasicBlock::Create(Ctx, "loop3", &F); + auto *BB5 = BasicBlock::Create(Ctx, "ret", &F); + IRBuilder<> IRB1(BB1), IRB2(BB2), IRB3(BB3), IRB4(BB4), IRB5(BB5); + IRB1.CreateBr(BB2); + IRB2.CreateBr(BB3); + IRB3.CreateBr(BB4); + IRB4.CreateBr(BB5); + IRB5.CreateRetVoid(); + MBB1 = MF->CreateMachineBasicBlock(BB1); + MF->insert(MF->end(), MBB1); + MBB2 = MF->CreateMachineBasicBlock(BB2); + MF->insert(MF->end(), MBB2); + MBB3 = MF->CreateMachineBasicBlock(BB3); + MF->insert(MF->end(), MBB3); + MBB4 = MF->CreateMachineBasicBlock(BB4); + MF->insert(MF->end(), MBB4); + MBB5 = MF->CreateMachineBasicBlock(BB5); + MF->insert(MF->end(), MBB5); + MBB1->addSuccessor(MBB2); + MBB2->addSuccessor(MBB2); + MBB2->addSuccessor(MBB3); + MBB3->addSuccessor(MBB2); + MBB3->addSuccessor(MBB3); + MBB3->addSuccessor(MBB4); + MBB4->addSuccessor(MBB3); + MBB4->addSuccessor(MBB4); + MBB4->addSuccessor(MBB5); + MF->RenumberBlocks(); + + setupLDVObj(); + + ASSERT_TRUE(MTracker->getNumLocs() == 1); + LocIdx RspLoc(0); + Register RAX = getRegByName("RAX"); + LocIdx RaxLoc = MTracker->lookupOrTrackRegister(RAX); + + ValueIDNum InLocs[5][2], OutLocs[5][2]; + ValueIDNum *InLocsPtr[5] = {InLocs[0], InLocs[1], InLocs[2], InLocs[3], + InLocs[4]}; + ValueIDNum *OutLocsPtr[5] = {OutLocs[0], OutLocs[1], OutLocs[2], OutLocs[3], + OutLocs[4]}; + + SmallVector TransferFunc; + TransferFunc.resize(5); + + ValueIDNum LiveInRsp(0, 0, RspLoc); + ValueIDNum RspPHIInBlk1(1, 0, RspLoc); + ValueIDNum RspDefInBlk1(1, 1, RspLoc); + ValueIDNum RspPHIInBlk2(2, 0, RspLoc); + ValueIDNum RspPHIInBlk3(3, 0, RspLoc); + ValueIDNum RspDefInBlk3(3, 1, RspLoc); + ValueIDNum LiveInRax(0, 0, RaxLoc); + ValueIDNum RaxPHIInBlk3(3, 0, RaxLoc); + + // As ever, test that everything is live-through if there are no defs. + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], LiveInRsp); + EXPECT_EQ(InLocs[2][0], LiveInRsp); + EXPECT_EQ(InLocs[3][0], LiveInRsp); + EXPECT_EQ(InLocs[4][0], LiveInRsp); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], LiveInRsp); + EXPECT_EQ(OutLocs[2][0], LiveInRsp); + EXPECT_EQ(OutLocs[3][0], LiveInRsp); + EXPECT_EQ(OutLocs[4][0], LiveInRsp); + + // A def in loop3 should cause PHIs in every loop block: they're all + // reachable from each other. + TransferFunc[3].insert({RspLoc, RspDefInBlk3}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(InLocs[3][0], RspPHIInBlk3); + EXPECT_EQ(InLocs[4][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(OutLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk3); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk3); + TransferFunc[3].clear(); + + // A def in loop1 should cause a PHI in loop1, but not the other blocks. + // loop2 and loop3 are dominated by the def in loop1, so they should have + // that value live-through. + TransferFunc[1].insert({RspLoc, RspDefInBlk1}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspDefInBlk1); + EXPECT_EQ(InLocs[3][0], RspDefInBlk1); + EXPECT_EQ(InLocs[4][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[2][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[3][0], RspDefInBlk1); + EXPECT_EQ(OutLocs[4][0], RspDefInBlk1); + TransferFunc[1].clear(); + + // As with earlier tricks: copy $rsp to $rax in the entry block, then $rax + // to $rsp in block 3. The only def of $rsp is simply copying the same value + // back into itself, and the value of $rsp is LiveInRsp all the way through. + // PHIs should be created, then value-propagated away... however this + // doesn't work in practice. + // Consider the entry to loop3: we can determine that there's an incoming + // PHI value from loop2, and LiveInRsp from the self-loop. This would still + // justify having a PHI on entry to loop3. The only way to completely + // value-propagate these PHIs away would be to speculatively explore what + // PHIs could be eliminated and what that would lead to; which is + // combinatorially complex. + // Happily: + // a) In this scenario, we always have a tracked location for LiveInRsp + // anyway, so there's no loss in availability, + // b) Only DBG_PHIs of a register would be vunlerable to this scenario, and + // even then only if a true PHI became a DBG_PHI and was then optimised + // through branch folding to no longer be at a CFG join, + // c) The register allocator can spot this kind of redundant COPY easily, + // and eliminate it. + // + // This unit test left in as a reference for the limitations of this + // approach. PHIs will be left in $rsp on entry to each block. + TransferFunc[0].insert({RaxLoc, LiveInRsp}); + TransferFunc[3].insert({RspLoc, RaxPHIInBlk3}); + initValueArray(InLocsPtr, 5, 2); + initValueArray(OutLocsPtr, 5, 2); + buildMLocValueMap(InLocsPtr, OutLocsPtr, TransferFunc); + EXPECT_EQ(InLocs[0][0], LiveInRsp); + EXPECT_EQ(InLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(InLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(InLocs[3][0], RspPHIInBlk3); + EXPECT_EQ(InLocs[4][0], LiveInRsp); + EXPECT_EQ(OutLocs[0][0], LiveInRsp); + EXPECT_EQ(OutLocs[1][0], RspPHIInBlk1); + EXPECT_EQ(OutLocs[2][0], RspPHIInBlk2); + EXPECT_EQ(OutLocs[3][0], LiveInRsp); + EXPECT_EQ(OutLocs[4][0], LiveInRsp); + // Check $rax's value. It should have $rsps value from the entry block + // onwards. + EXPECT_EQ(InLocs[0][1], LiveInRax); + EXPECT_EQ(InLocs[1][1], LiveInRsp); + EXPECT_EQ(InLocs[2][1], LiveInRsp); + EXPECT_EQ(InLocs[3][1], LiveInRsp); + EXPECT_EQ(InLocs[4][1], LiveInRsp); + EXPECT_EQ(OutLocs[0][1], LiveInRsp); + EXPECT_EQ(OutLocs[1][1], LiveInRsp); + EXPECT_EQ(OutLocs[2][1], LiveInRsp); + EXPECT_EQ(OutLocs[3][1], LiveInRsp); + EXPECT_EQ(OutLocs[4][1], LiveInRsp); +}