Index: lib/CodeGen/InlineSpiller.cpp =================================================================== --- lib/CodeGen/InlineSpiller.cpp +++ lib/CodeGen/InlineSpiller.cpp @@ -1387,6 +1387,23 @@ if (Ent.second.empty()) continue; + // Splitting live range of a partially defined register may actually + // incorporate new instructions into the live ranges after splitting. + // This is because even though the register may only be partially + // defined, the live range boundary will be a copy/spill using the + // entire register. If that happens in a loop, a new live range may + // contain segments which were not even partially defined in the + // original interval. Spills corresponding to those ranges may not + // have any original VNI associated with them, and since they are + // storing undefined values, they can be safely removed. + if (OrigVNI == nullptr) { + for (auto *MI : EqValSpills) { + LIS.RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + continue; + } + DEBUG({ dbgs() << "\nFor Slot" << Slot << " and VN" << OrigVNI->id << ":\n" << "Equal spills in BB: "; Index: lib/CodeGen/LiveIntervalAnalysis.cpp =================================================================== --- lib/CodeGen/LiveIntervalAnalysis.cpp +++ lib/CodeGen/LiveIntervalAnalysis.cpp @@ -352,7 +352,7 @@ static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, ShrinkToUsesWorkList &WorkList, - const LiveRange &OldRange) { + const LiveRange &OldRange, bool AllowUndef) { // Keep track of the PHIs that are in use. SmallPtrSet UsedPHIs; // Blocks that have already been added to WorkList as live-out. @@ -395,6 +395,8 @@ if (!LiveOut.insert(Pred).second) continue; SlotIndex Stop = Indexes.getMBBEndIdx(Pred); + if (AllowUndef && OldRange.getVNInfoBefore(Stop) == nullptr) + continue; assert(OldRange.getVNInfoBefore(Stop) == VNI && "Wrong value out of predecessor"); WorkList.push_back(std::make_pair(Stop, VNI)); @@ -421,21 +423,41 @@ // Find all the values used, including PHI kills. ShrinkToUsesWorkList WorkList; + auto liveSub = [li] (SlotIndex Idx) -> bool { + for (LiveInterval::SubRange &S : li->subranges()) + if (S.liveAt(Idx.getPrevSlot())) + return true; + return false; + }; + // Visit all instructions reading li->reg. - for (MachineRegisterInfo::reg_instr_iterator - I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end(); - I != E; ) { - MachineInstr *UseMI = &*(I++); - if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) + SlotIndex LastIdx; + for (MachineOperand &MO : MRI->reg_operands(li->reg)) { + MachineInstr &UseMI = *MO.getParent(); + if (UseMI.isDebugValue()) continue; - SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot(); + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + if (Idx == LastIdx) + continue; + LastIdx = Idx; + + if (!UseMI.readsVirtualRegister(li->reg)) { + // An instruction + // %vreg0:sub1 = ... + // does not read vreg0, but we can't just skip it, since it will + // appear as a def of vreg0. If vreg0:sub0 has been defined and is + // not dead, skipping this instruction would cause the prior def + // to appear dead in the main live range for vreg0. + if (!liveSub(Idx)) + continue; + } LiveQueryResult LRQ = li->Query(Idx); VNInfo *VNI = LRQ.valueIn(); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting flags // wrong. - DEBUG(dbgs() << Idx << '\t' << *UseMI + DEBUG(dbgs() << Idx << '\t' << UseMI << "Warning: Instr claims to read non-existent value in " << *li << '\n'); continue; @@ -451,7 +473,7 @@ // Create new live ranges with only minimal live segments per def. LiveRange NewLR; createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end())); - extendSegmentsToUses(NewLR, *Indexes, WorkList, *li); + extendSegmentsToUses(NewLR, *Indexes, WorkList, *li, false); // Move the trimmed segments back. li->segments.swap(NewLR.segments); @@ -506,7 +528,8 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) { - DEBUG(dbgs() << "Shrink: " << SR << '\n'); + DEBUG(dbgs() << "Shrink: L" << PrintLaneMask(SR.LaneMask) + << ' ' << SR << '\n'); assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only shrink virtual registers"); // Find all the values used, including PHI kills. @@ -516,12 +539,14 @@ SlotIndex LastIdx; for (MachineOperand &MO : MRI->reg_operands(Reg)) { MachineInstr *UseMI = MO.getParent(); - if (UseMI->isDebugValue()) + if (UseMI->isDebugValue() || !MO.readsReg()) continue; // Maybe the operand is for a subregister we don't care about. unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg); + if (MO.isDef() && !MO.isUndef()) + LaneMask = ~LaneMask & MRI->getMaxLaneMaskForVReg(Reg); if ((LaneMask & SR.LaneMask) == 0) continue; } @@ -549,7 +574,7 @@ // Create a new live ranges with only minimal live segments per def. LiveRange NewLR; createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end())); - extendSegmentsToUses(NewLR, *Indexes, WorkList, SR); + extendSegmentsToUses(NewLR, *Indexes, WorkList, SR, true); // Move the trimmed ranges back. SR.segments.swap(NewLR.segments); @@ -564,9 +589,9 @@ continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. + DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); VNI->markUnused(); SR.removeSegment(*Segment); - DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); } } @@ -1538,10 +1563,11 @@ } void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) { + // LI may not have the main range computed yet, but its subranges may + // be present. VNInfo *VNI = LI.getVNInfoAt(Pos); - if (VNI == nullptr) - return; - LI.removeValNo(VNI); + if (VNI != nullptr) + LI.removeValNo(VNI); // Also remove the value in subranges. for (LiveInterval::SubRange &S : LI.subranges()) { Index: lib/CodeGen/LiveRangeCalc.h =================================================================== --- lib/CodeGen/LiveRangeCalc.h +++ lib/CodeGen/LiveRangeCalc.h @@ -24,6 +24,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveInterval.h" namespace llvm { @@ -53,6 +54,10 @@ /// when switching live ranges. BitVector Seen; + /// Bit vector of blocks such that there is a path from at least one of + /// the defs in the live range to the entry of the block. + BitVector DefOnEntry; + /// Map each basic block where a live range is live out to the live-out value /// and its defining block. /// @@ -101,18 +106,31 @@ /// used to add entries directly. SmallVector LiveIn; - /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can - /// reach it. + /// Check if the entry to block @p MBB can be reached by any of the defs + /// in @p LR. Return true if none of the defs reach the entry to @p MBB. + bool isDefOnEntry(LiveRange &LR, MachineBasicBlock &MBB); + + /// Find the set of defs that can reach @p Kill. @p Kill must belong to + /// @p UseMBB. /// - /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB - /// are added to @p LR, and the function returns true. + /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill, + /// all paths from the def to @p UseMBB are added to @p LR, and the function + /// returns true. /// /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be /// live in are added to the LiveIn array, and the function returns false. /// + /// Unless @p AllowUndef is set, it is assumed that @p LR is live at @p Kill. + /// If @p AllowUndef is set and @p Kill can be reached from entry without + /// encountering any def, the function returns false. Allowing @p Kill to be + /// undefined helps when extending subranges of a live interval: while the + /// register as a whole may be live at @p Kill, a particular subregister may + /// be undefined. Whether that is the case or not is often unknown until the + /// reaching defs are actually located. + /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, - SlotIndex Kill, unsigned PhysReg); + SlotIndex Kill, unsigned PhysReg, bool AllowUndef); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -127,9 +145,23 @@ /// Extend the live range of @p LR to reach all uses of Reg. /// - /// All uses must be jointly dominated by existing liveness. PHI-defs are - /// inserted as needed to preserve SSA form. - void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask); + /// All uses must be jointly dominated by existing liveness, unless + /// @p AllowUndef is true.. PHI-defs are inserted as needed to preserve + /// SSA form. The set @p LiveAt optionally contains indexes where the + /// register is known to be live. The motivation for this are read-undef + /// definitions of Reg's subregisters. Consider the following: + /// Reg:sub0 = ... + /// ... + /// Reg:sub1 = ... + /// The second def does not read Reg, and by looking at machine operands + /// alone, the main live range for Reg would seem to have two defs, the + /// first of which does not reach any use. It would then appear to be + /// dead, while in fact, the sub0 lane is actually live. Inserting the + /// location of the second def into the @p LiveAt set informs this + /// function that Reg is live at that location. + void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask, + const SmallSet &LiveAt = SmallSet(), + bool AllowUndef = false); /// Reset Map and Seen fields. void resetLiveOutMap(); @@ -169,7 +201,8 @@ /// inserted as required to preserve SSA form. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0); + void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0, + bool AllowUndef = false); /// createDeadDefs - Create a dead def in LI for every def operand of Reg. /// Each instruction defining Reg gets a new VNInfo with a corresponding Index: lib/CodeGen/LiveRangeCalc.cpp =================================================================== --- lib/CodeGen/LiveRangeCalc.cpp +++ lib/CodeGen/LiveRangeCalc.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "LiveRangeCalc.h" +#include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -23,6 +24,8 @@ unsigned NumBlocks = MF->getNumBlockIDs(); Seen.clear(); Seen.resize(NumBlocks); + DefOnEntry.clear(); + DefOnEntry.resize(NumBlocks); Map.resize(NumBlocks); } @@ -133,15 +136,27 @@ assert(MainRange.segments.empty() && MainRange.valnos.empty() && "Expect empty main liverange"); + auto liveSub = [&LI] (LaneBitmask Skip, SlotIndex Idx) -> bool { + SlotIndex Prev = Idx.getPrevSlot(); + for (const LiveInterval::SubRange &SR : LI.subranges()) + if (SR.LaneMask != Skip && SR.liveAt(Prev)) + return true; + return false; + }; + + SmallSet LiveAt; + for (const LiveInterval::SubRange &SR : LI.subranges()) { for (const VNInfo *VNI : SR.valnos) { - if (!VNI->isUnused() && !VNI->isPHIDef()) - MainRange.createDeadDef(VNI->def, *Alloc); + if (VNI->isUnused() || VNI->isPHIDef()) + continue; + MainRange.createDeadDef(VNI->def, *Alloc); + if (liveSub(SR.LaneMask, VNI->def)) + LiveAt.insert(VNI->def.getBaseIndex()); } } - resetLiveOutMap(); - extendToUses(MainRange, LI.reg); + extendToUses(MainRange, LI.reg, ~0U, LiveAt, true); } void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { @@ -154,8 +169,9 @@ } -void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, - LaneBitmask Mask) { +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask, + const SmallSet &LiveAt, + bool AllowUndef) { // Visit all operands that read Reg. This may include partial defs. const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { @@ -170,8 +186,11 @@ continue; } - if (!MO.readsReg()) - continue; + if (!MO.readsReg()) { + SlotIndex Idx = Indexes->getInstructionIndex(*MO.getParent()); + if (!LiveAt.count(Idx.getBaseIndex())) + continue; + } unsigned SubReg = MO.getSubReg(); if (SubReg != 0) { LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); @@ -205,7 +224,7 @@ // MI is reading Reg. We may have visited MI before if it happens to be // reading Reg multiple times. That is OK, extend() is idempotent. - extend(LR, UseIdx, Reg); + extend(LR, UseIdx, Reg, AllowUndef || !LiveAt.empty()); } } @@ -236,7 +255,8 @@ } -void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) { +void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg, + bool AllowUndef) { assert(Use.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); @@ -252,7 +272,7 @@ // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - if (findReachingDefs(LR, *UseMBB, Use, PhysReg)) + if (findReachingDefs(LR, *UseMBB, Use, PhysReg, AllowUndef)) return; // When there were multiple different values, we may need new PHIs. @@ -271,8 +291,47 @@ } +bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, MachineBasicBlock &MBB) { + if (DefOnEntry.test(MBB.getNumber())) + return true; + + SetVector WorkList; + // Checking if the entry of MBB is reached by some def: add all predecessors + // to the work list. + for (MachineBasicBlock *P : MBB.predecessors()) + WorkList.insert(P->getNumber()); + + for (unsigned i = 0; i != WorkList.size(); ++i) { + // Determine if the exit from the block is reached by some def. + unsigned N = WorkList[i]; + // If a block is defined on entry, it is also defined on exit. + if (DefOnEntry.test(N)) + return true; + // Check if BB#N contains a definition. + MachineBasicBlock &B = *MF->getBlockNumbered(WorkList[i]); + SlotIndex Begin, End; + std::tie(Begin, End) = Indexes->getMBBRange(&B); + for (LiveRange::Segment &Seg : LR) { + if (Seg.end <= Begin || Seg.start >= End) + continue; + // There is a def in the block. + for (MachineBasicBlock *S : B.successors()) + DefOnEntry.set(S->getNumber()); + DefOnEntry.set(MBB.getNumber()); + return true; + } + // Still don't know: add all predecessors to the work list. + for (MachineBasicBlock *P : B.predecessors()) + WorkList.insert(P->getNumber()); + } + + return false; +} + + bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, - SlotIndex Use, unsigned PhysReg) { + SlotIndex Use, unsigned PhysReg, + bool AllowUndef) { unsigned UseMBBNum = UseMBB.getNumber(); // Block numbers where LR should be live-in. @@ -282,12 +341,14 @@ bool UniqueVNI = true; VNInfo *TheVNI = nullptr; + bool FoundUndef = false; + // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]); #ifndef NDEBUG - if (MBB->pred_empty()) { + if (!AllowUndef && MBB->pred_empty()) { MBB->getParent()->verify(); errs() << "Use of " << PrintReg(PhysReg) << " does not have a corresponding definition on every path:\n"; @@ -306,6 +367,7 @@ llvm_unreachable("Invalid global physical register"); } #endif + FoundUndef |= MBB->pred_empty(); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { @@ -337,7 +399,7 @@ // No, we need a live-in value for Pred as well if (Pred != &UseMBB) - WorkList.push_back(Pred->getNumber()); + WorkList.push_back(Pred->getNumber()); else // Loopback to UseMBB, so value is really live through. Use = SlotIndex(); @@ -346,6 +408,9 @@ LiveIn.clear(); + if (AllowUndef && FoundUndef) + UniqueVNI = false; + // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but // neither require it. Skip the sorting overhead for small updates. if (WorkList.size() > 4) @@ -354,16 +419,15 @@ // If a unique reaching def was found, blit in the live ranges immediately. if (UniqueVNI) { LiveRangeUpdater Updater(&LR); - for (SmallVectorImpl::const_iterator I = WorkList.begin(), - E = WorkList.end(); I != E; ++I) { - SlotIndex Start, End; - std::tie(Start, End) = Indexes->getMBBRange(*I); - // Trim the live range in UseMBB. - if (*I == UseMBBNum && Use.isValid()) - End = Use; - else - Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr); - Updater.add(Start, End, TheVNI); + for (unsigned BN : WorkList) { + SlotIndex Start, End; + std::tie(Start, End) = Indexes->getMBBRange(BN); + // Trim the live range in UseMBB. + if (BN == UseMBBNum && Use.isValid()) + End = Use; + else + Map[MF->getBlockNumbered(BN)] = LiveOutPair(TheVNI, nullptr); + Updater.add(Start, End, TheVNI); } return true; } @@ -371,9 +435,10 @@ // Multiple values were found, so transfer the work list to the LiveIn array // where UpdateSSA will use it as a work list. LiveIn.reserve(WorkList.size()); - for (SmallVectorImpl::const_iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { - MachineBasicBlock *MBB = MF->getBlockNumbered(*I); + for (unsigned BN : WorkList) { + MachineBasicBlock *MBB = MF->getBlockNumbered(BN); + if (AllowUndef && !isDefOnEntry(LR, *MBB)) + continue; addLiveInBlock(LR, DomTree->getNode(MBB)); if (MBB == &UseMBB) LiveIn.back().Kill = Use; @@ -458,10 +523,12 @@ I.DomNode = nullptr; // Add liveness since updateFromLiveIns now skips this node. - if (I.Kill.isValid()) - LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI)); - else { - LR.addSegment(LiveInterval::Segment(Start, End, VNI)); + if (I.Kill.isValid()) { + if (VNI) + LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI)); + } else { + if (VNI) + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { Index: lib/CodeGen/LiveRangeEdit.cpp =================================================================== --- lib/CodeGen/LiveRangeEdit.cpp +++ lib/CodeGen/LiveRangeEdit.cpp @@ -37,6 +37,13 @@ VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } LiveInterval &LI = LIS.createEmptyInterval(VReg); + // Create empty subranges if the OldReg's interval has them. Do not create + // the main range here---it will be constructed later after the subranges + // have been finalized. + LiveInterval &OldLI = LIS.getInterval(OldReg); + VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator(); + for (LiveInterval::SubRange &S : OldLI.subranges()) + LI.createSubRange(Alloc, S.LaneMask); return LI; } @@ -66,6 +73,8 @@ unsigned Original = VRM->getOriginal(getReg()); LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + if (!OrigVNI) + continue; MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def); if (!DefMI) continue; @@ -333,6 +342,7 @@ // after all the allocations of the func are done. if (isOrigDef) { LiveInterval &NewLI = createEmptyIntervalFrom(Dest); + NewLI.removeEmptySubRanges(); VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI)); pop_back(); Index: lib/CodeGen/MachineVerifier.cpp =================================================================== --- lib/CodeGen/MachineVerifier.cpp +++ lib/CodeGen/MachineVerifier.cpp @@ -1773,18 +1773,25 @@ bool hasRead = false; bool hasSubRegDef = false; bool hasDeadDef = false; + LaneBitmask RLM = MRI->getMaxLaneMaskForVReg(Reg); for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) { if (!MOI->isReg() || MOI->getReg() != Reg) continue; - if (LaneMask != 0 && - (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0) - continue; + unsigned Sub = MOI->getSubReg(); + LaneBitmask SLM = Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub) : RLM; if (MOI->isDef()) { - if (MOI->getSubReg() != 0) + if (Sub != 0) { hasSubRegDef = true; + // An operand vreg0:sub0 reads vreg0:sub1..n. Invert the lane + // mask for subregister defs. Read-undef defs will be handled by + // readsReg below. + SLM = ~SLM & RLM; + } if (MOI->isDead()) hasDeadDef = true; } + if (LaneMask != 0 && !(LaneMask & SLM)) + continue; if (MOI->readsReg()) hasRead = true; } Index: lib/CodeGen/SplitKit.h =================================================================== --- lib/CodeGen/SplitKit.h +++ lib/CodeGen/SplitKit.h @@ -324,12 +324,30 @@ return LRCalc[SpillMode != SM_Partition && RegIdx != 0]; } + /// Find a subrange corresponding to the lane mask LM in live interval LI. + /// The interval LI is assumed to contain such a subrange. This function + /// is used to find corresponding subranges between the original interval + /// and the new intervals. + LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI); + + /// Add a segment to the interval LI for the value number VNI. If LI has + /// subranges, corresponding segments will be added to them as well, but + /// with newly created value numbers. If Original is true, dead def will + /// only be added a subrange of LI if the corresponding subrange of the + /// original interval has a def at this index. Otherwise, all subranges + /// of LI will be updated. + void addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original); + /// defValue - define a value in RegIdx from ParentVNI at Idx. /// Idx does not have to be ParentVNI->def, but it must be contained within /// ParentVNI's live range in ParentLI. The new value is added to the value - /// map. + /// map. The value being defined may either come from rematerialization + /// (or an inserted copy), or it may be coming from the original interval. + /// The parameter Original should be true in the latter case, otherwise + /// it should be false. /// Return the new LI value. - VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx); + VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx, + bool Original); /// forceRecompute - Force the live range of ParentVNI in RegIdx to be /// recomputed by LiveRangeCalc::extend regardless of the number of defs. Index: lib/CodeGen/SplitKit.cpp =================================================================== --- lib/CodeGen/SplitKit.cpp +++ lib/CodeGen/SplitKit.cpp @@ -379,9 +379,47 @@ } #endif +LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM, + LiveInterval &LI) { + for (LiveInterval::SubRange &S : LI.subranges()) + if (S.LaneMask == LM) + return S; + llvm_unreachable("SubRange for this mask not found"); +}; + +void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) { + SlotIndex Def = VNI->def; + if (!LI.hasSubRanges()) { + LI.addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); + return; + } + + if (Original) { + // If we are transferring a def from the original interval, make sure + // to only update the subranges for which the original subranges had + // a def at this location. + for (LiveInterval::SubRange &S : LI.subranges()) { + auto &PS = getSubRangeForMask(S.LaneMask, Edit->getParent()); + VNInfo *PV = PS.getVNInfoAt(Def); + if (PV != nullptr && PV->def == Def) { + VNInfo *V = S.getNextValue(Def, LIS.getVNInfoAllocator()); + S.addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), V)); + } + } + } else { + // This is a new def: either from rematerialization, of from an inserted + // copy. Since it will define the whole register, update all subranges. + for (LiveInterval::SubRange &S : LI.subranges()) { + VNInfo *V = S.getNextValue(Def, LIS.getVNInfoAllocator()); + S.addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), V)); + } + } +} + VNInfo *SplitEditor::defValue(unsigned RegIdx, const VNInfo *ParentVNI, - SlotIndex Idx) { + SlotIndex Idx, + bool Original) { assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); @@ -390,28 +428,28 @@ // Create a new value. VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); + bool Force = LI->hasSubRanges(); + ValueForcePair FP(Force ? nullptr : VNI, Force); // Use insert for lookup, so we can add missing values with a second lookup. std::pair InsP = - Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), - ValueForcePair(VNI, false))); + Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), FP)); - // This was the first time (RegIdx, ParentVNI) was mapped. - // Keep it as a simple def without any liveness. - if (InsP.second) + // This was the first time (RegIdx, ParentVNI) was mapped, and it is not + // forced. Keep it as a simple def without any liveness. + if (!Force && InsP.second) return VNI; // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { - SlotIndex Def = OldVNI->def; - LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI)); - // No longer a simple mapping. Switch to a complex, non-forced mapping. - InsP.first->second = ValueForcePair(); + addDeadDef(*LI, OldVNI, Original); + + // No longer a simple mapping. Switch to a complex mapping. If the + // interval has subranges, make it a forced mapping. + InsP.first->second = ValueForcePair(nullptr, Force); } // This is a complex mapping, add liveness for VNI - SlotIndex Def = VNI->def; - LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); - + addDeadDef(*LI, VNI, Original); return VNI; } @@ -429,9 +467,8 @@ // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. - SlotIndex Def = VNI->def; - LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); - LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); + addDeadDef(LIS.getInterval(Edit->get(RegIdx)), VNI, false); + // Mark as complex mapped, forced. VFP = ValueForcePair(nullptr, true); } @@ -453,13 +490,18 @@ unsigned Original = VRM.getOriginal(Edit->get(RegIdx)); LiveInterval &OrigLI = LIS.getInterval(Original); VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); - LiveRangeEdit::Remat RM(ParentVNI); - RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); - if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) { - Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); - ++NumRemats; - } else { + bool DidRemat = false; + if (OrigVNI) { + LiveRangeEdit::Remat RM(ParentVNI); + RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); + if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); + ++NumRemats; + DidRemat = true; + } + } + if (!DidRemat) { // Can't remat, just insert a copy from parent. CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) .addReg(Edit->getReg()); @@ -470,7 +512,7 @@ } // Define the value in Reg. - return defValue(RegIdx, ParentVNI, Def); + return defValue(RegIdx, ParentVNI, Def, false); } /// Create a new virtual register and live interval. @@ -942,14 +984,15 @@ } // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. - DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); - LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); + DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx + << '(' << PrintReg(Edit->get(RegIdx)) << ')'); + LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); // Check for a simply defined value that can be blitted directly. ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); if (VNInfo *VNI = VFP.getPointer()) { DEBUG(dbgs() << ':' << VNI->id); - LR.addSegment(LiveInterval::Segment(Start, End, VNI)); + LI.addSegment(LiveInterval::Segment(Start, End, VNI)); Start = End; continue; } @@ -973,7 +1016,7 @@ // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { - VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); // MBB has its own def. Is it also live-out? @@ -993,7 +1036,7 @@ if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); - VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); if (End >= BlockEnd) LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well. @@ -1001,10 +1044,10 @@ // This block needs a live-in value. The last block covered may not // be live-out. if (End < BlockEnd) - LRC.addLiveInBlock(LR, MDT[&*MBB], End); + LRC.addLiveInBlock(LI, MDT[&*MBB], End); else { // Live-through, and we don't know the value. - LRC.addLiveInBlock(LR, MDT[&*MBB]); + LRC.addLiveInBlock(LI, MDT[&*MBB]); LRC.setLiveOutValue(&*MBB, nullptr); } } @@ -1025,40 +1068,76 @@ void SplitEditor::extendPHIKillRanges() { // Extend live ranges to be live-out for successor PHI values. - for (const VNInfo *PHIVNI : Edit->getParent().valnos) { - if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) - continue; - unsigned RegIdx = RegAssign.lookup(PHIVNI->def); - LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); - - // Check whether PHI is dead. - const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def); - assert(Segment != nullptr && "Missing segment for VNI"); - if (Segment->end == PHIVNI->def.getDeadSlot()) { - // This is a dead PHI. Remove it. - LR.removeSegment(*Segment, true); - continue; - } + auto RemoveDeadSegment = [] (SlotIndex Def, LiveRange &LR) -> bool { + const LiveRange::Segment *Seg = LR.getSegmentContaining(Def); + assert(Seg != nullptr && "Missing segment for VNI"); + if (Seg->end != Def.getDeadSlot()) + return false; + // This is a dead PHI. Remove it. + LR.removeSegment(*Seg, true); + return true; + }; - LiveRangeCalc &LRC = getLRCalc(RegIdx); - MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - SlotIndex End = LIS.getMBBEndIdx(*PI); + auto ExtendPHIRange = [this] (MachineBasicBlock &B, LiveRangeCalc &LRC, + LiveRange &LR, bool AllowUndef) -> void { + for (MachineBasicBlock *P : B.predecessors()) { + SlotIndex End = LIS.getMBBEndIdx(P); SlotIndex LastUse = End.getPrevSlot(); // The predecessor may not have a live-out value. That is OK, like an // undef PHI operand. - if (Edit->getParent().liveAt(LastUse)) { - assert(RegAssign.lookup(LastUse) == RegIdx && - "Different register assignment in phi predecessor"); - LRC.extend(LR, End); - } + if (Edit->getParent().liveAt(LastUse)) + LRC.extend(LR, End, 0, AllowUndef); + } + }; + + // Visit each PHI def slot in the parent live interval. If the def is dead, + // remove it. Otherwise, extend the live interval to reach the end indexes + // of all predecessor blocks. + + LiveInterval &ParentLI = Edit->getParent(); + for (const VNInfo *V : ParentLI.valnos) { + if (V->isUnused() || !V->isPHIDef()) + continue; + + unsigned RegIdx = RegAssign.lookup(V->def); + LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); + LiveRangeCalc &LRC = getLRCalc(RegIdx); + MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def); + if (!RemoveDeadSegment(V->def, LI)) + ExtendPHIRange(B, LRC, LI, false); + } + + LiveRangeCalc SubLRC; + + for (LiveInterval::SubRange &PS : ParentLI.subranges()) { + for (const VNInfo *V : PS.valnos) { + if (V->isUnused() || !V->isPHIDef()) + continue; + unsigned RegIdx = RegAssign.lookup(V->def); + LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); + LiveInterval::SubRange &S = getSubRangeForMask(PS.LaneMask, LI); + if (RemoveDeadSegment(V->def, S)) + continue; + + MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def); + SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); + ExtendPHIRange(B, SubLRC, S, true); } } } /// rewriteAssigned - Rewrite all uses of Edit->getReg(). void SplitEditor::rewriteAssigned(bool ExtendRanges) { + auto liveSub = [this] (SlotIndex Idx, LaneBitmask LM) -> bool { + for (LiveInterval::SubRange &S : Edit->getParent().subranges()) + if ((S.LaneMask & LM) != 0 && S.liveAt(Idx.getPrevSlot())) + return true; + return false; + }; + + SetVector RecalcMain; + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), RE = MRI.reg_end(); RI != RE;) { MachineOperand &MO = *RI; @@ -1078,22 +1157,29 @@ if (MO.isDef() || MO.isUndef()) Idx = Idx.getRegSlot(MO.isEarlyClobber()); + LaneBitmask LM = MRI.getMaxLaneMaskForVReg(MO.getReg()); + // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); - MO.setReg(LI->reg); + LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx)); + MO.setReg(LI.reg); + if (LI.hasSubRanges()) + RecalcMain.insert(LI.reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); // Extend liveness to Idx if the instruction reads reg. - if (!ExtendRanges || MO.isUndef()) + if (!ExtendRanges) continue; // Skip instructions that don't read Reg. if (MO.isDef()) { - if (!MO.getSubReg() && !MO.isEarlyClobber()) + unsigned Sub = MO.getSubReg(); + if (Sub == 0 && !MO.isEarlyClobber()) continue; - // We may wan't to extend a live range for a partial redef, or for a use + if (Sub != 0 && !liveSub(Idx, LM & ~TRI.getSubRegIndexLaneMask(Sub))) + continue; + // We may want to extend a live range for a partial redef, or for a use // tied to an early clobber. Idx = Idx.getPrevSlot(); if (!Edit->getParent().liveAt(Idx)) @@ -1101,7 +1187,42 @@ } else Idx = Idx.getRegSlot(true); - getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot()); + SlotIndex Next = Idx.getNextSlot(); + + if (LI.hasSubRanges()) { + LiveRangeCalc SubLRC; + unsigned Reg = MO.getReg(), Sub = MO.getSubReg(); + LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub) + : MRI.getMaxLaneMaskForVReg(Reg); + // If this is a definition of a sub-register, extend subranges for + // everything except that sub-register. + if (Sub != 0 && MO.isDef()) + LM = MRI.getMaxLaneMaskForVReg(Reg) & ~LM; + for (LiveInterval::SubRange &S : LI.subranges()) { + if (!(S.LaneMask & LM)) + continue; + // The problem here can be that the new register may have been created + // for a partially defined original register. For example: + // %vreg827:subreg_hireg = ... + // ... + // %vreg828 = COPY %vreg827 + // Do not try to extend subranges that do not have any prior defs. + if (S.empty()) + continue; + SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); + SubLRC.extend(S, Next, 0, true); + } + } else { + LiveRangeCalc &LRC = getLRCalc(RegIdx); + LRC.extend(LI, Next); + } + } + + for (unsigned R : RecalcMain) { + LiveInterval &LI = LIS.getInterval(R); + LI.clear(); + LIS.constructMainRangeFromSubranges(LI); } } @@ -1144,7 +1265,7 @@ if (ParentVNI->isUnused()) continue; unsigned RegIdx = RegAssign.lookup(ParentVNI->def); - defValue(RegIdx, ParentVNI, ParentVNI->def); + defValue(RegIdx, ParentVNI, ParentVNI->def, true); // Force rematted values to be recomputed everywhere. // The new live ranges may be truncated. @@ -1180,8 +1301,9 @@ deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) { - LiveInterval &LI = LIS.getInterval(*I); + for (unsigned Reg : *Edit) { + LiveInterval &LI = LIS.getInterval(Reg); + LI.removeEmptySubRanges(); LI.RenumberValues(); } Index: lib/CodeGen/VirtRegMap.cpp =================================================================== --- lib/CodeGen/VirtRegMap.cpp +++ lib/CodeGen/VirtRegMap.cpp @@ -337,6 +337,7 @@ assert(LI.liveAt(BaseIndex) && "Reads of completely dead register should be marked undef already"); unsigned SubRegIdx = MO.getSubReg(); + assert(SubRegIdx != 0 && LI.hasSubRanges()); LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx); // See if any of the relevant subregister liveranges is defined at this point. for (const LiveInterval::SubRange &SR : LI.subranges()) { Index: test/CodeGen/Hexagon/regalloc-bad-undef.mir =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/regalloc-bad-undef.mir @@ -0,0 +1,209 @@ +# RUN: llc -march=hexagon -hexagon-subreg-liveness -start-after machine-scheduler -stop-after stack-slot-coloring %s 2>&1 -o /dev/null | FileCheck %s + +--- | + target triple = "hexagon" + + ; Function Attrs: nounwind optsize + define void @main() #0 { + entry: + br label %for.body + + for.body: ; preds = %if.end82, %entry + %lsr.iv = phi i32 [ %lsr.iv.next, %if.end82 ], [ 524288, %entry ] + %call9 = tail call i32 @lrand48() #0 + %conv10 = sext i32 %call9 to i64 + %shr11 = lshr i64 %conv10, 9 + %or12 = or i64 0, %shr11 + %call14 = tail call i32 @lrand48() #0 + %conv15138 = zext i32 %call14 to i64 + %shr16 = lshr i64 %conv15138, 9 + %0 = call i64 @llvm.hexagon.S2.extractup(i64 %conv15138, i32 22, i32 9) + %1 = shl i64 %0, 42 + %shl17 = shl i64 %shr16, 42 + %or22 = or i64 0, %1 + %or26 = or i64 %or22, 0 + %shr30 = lshr i64 undef, 25 + %2 = call i64 @llvm.hexagon.S2.extractup(i64 undef, i32 6, i32 25) + %and = and i64 %shr30, 63 + %sub = shl i64 2, %2 + %add = add i64 %sub, -1 + %shl37 = shl i64 %add, 0 + %call38 = tail call i32 @lrand48() #0 + %conv39141 = zext i32 %call38 to i64 + %shr40 = lshr i64 %conv39141, 25 + %3 = call i64 @llvm.hexagon.S2.extractup(i64 %conv39141, i32 6, i32 25) + %and41 = and i64 %shr40, 63 + %sub43 = shl i64 2, %3 + %add45 = add i64 %sub43, -1 + %call46 = tail call i32 @lrand48() #0 + %shr48 = lshr i64 undef, 25 + %4 = call i64 @llvm.hexagon.S2.extractup(i64 undef, i32 6, i32 25) + %and49 = and i64 %shr48, 63 + %shl50 = shl i64 %add45, %4 + %and52 = and i64 %shl37, %or12 + %and54 = and i64 %shl50, %or26 + store i64 %and54, i64* undef, align 8 + %cmp56 = icmp eq i64 %and52, 0 + br i1 %cmp56, label %for.end, label %if.end82 + + if.end82: ; preds = %for.body + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %exitcond = icmp eq i32 %lsr.iv.next, 0 + br i1 %exitcond, label %for.end, label %for.body + + for.end: ; preds = %if.end82, %for.body + unreachable + } + + declare i32 @lrand48() #0 + declare i64 @llvm.hexagon.S2.extractup(i64, i32, i32) #1 + + attributes #0 = { nounwind optsize "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double" } + attributes #1 = { nounwind readnone } + +... +--- +name: main +alignment: 2 +exposesReturnsTwice: false +hasInlineAsm: false +allVRegsAllocated: false +isSSA: false +tracksRegLiveness: true +tracksSubRegLiveness: true +registers: + - { id: 0, class: intregs } + - { id: 1, class: intregs } + - { id: 2, class: intregs } + - { id: 3, class: intregs } + - { id: 4, class: doubleregs } + - { id: 5, class: intregs } + - { id: 6, class: doubleregs } + - { id: 7, class: doubleregs } + - { id: 8, class: doubleregs } + - { id: 9, class: doubleregs } + - { id: 10, class: intregs } + - { id: 11, class: doubleregs } + - { id: 12, class: doubleregs } + - { id: 13, class: doubleregs } + - { id: 14, class: intregs } + - { id: 15, class: doubleregs } + - { id: 16, class: doubleregs } + - { id: 17, class: intregs } + - { id: 18, class: doubleregs } + - { id: 19, class: intregs } + - { id: 20, class: doubleregs } + - { id: 21, class: doubleregs } + - { id: 22, class: doubleregs } + - { id: 23, class: intregs } + - { id: 24, class: doubleregs } + - { id: 25, class: predregs } + - { id: 26, class: predregs } + - { id: 27, class: intregs } + - { id: 28, class: intregs } + - { id: 29, class: doubleregs } + - { id: 30, class: intregs } + - { id: 31, class: intregs } + - { id: 32, class: doubleregs } + - { id: 33, class: intregs } + - { id: 34, class: intregs } + - { id: 35, class: doubleregs } + - { id: 36, class: doubleregs } + - { id: 37, class: intregs } + - { id: 38, class: intregs } + - { id: 39, class: doubleregs } + - { id: 40, class: doubleregs } + - { id: 41, class: intregs } + - { id: 42, class: intregs } + - { id: 43, class: doubleregs } + - { id: 44, class: intregs } + - { id: 45, class: intregs } + - { id: 46, class: doubleregs } + - { id: 47, class: doubleregs } + - { id: 48, class: doubleregs } + - { id: 49, class: doubleregs } + - { id: 50, class: doubleregs } + - { id: 51, class: doubleregs } + - { id: 52, class: intregs } + - { id: 53, class: intregs } + - { id: 54, class: intregs } + - { id: 55, class: doubleregs } + - { id: 56, class: doubleregs } + - { id: 57, class: intregs } + - { id: 58, class: intregs } + - { id: 59, class: intregs } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: true + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.1.for.body + + %59 = A2_tfrsi 524288 + undef %32:subreg_hireg = A2_tfrsi 0 + %8 = S2_extractup undef %9, 6, 25 + %47 = A2_tfrpi 2 + %13 = A2_tfrpi -1 + %13 = S2_asl_r_p_acc %13, %47, %8:subreg_loreg + %51 = A2_tfrpi 0 + + ; CHECK: %d2 = S2_extractup undef %d0, 6, 25 + ; CHECK: %d0 = A2_tfrpi 2 + ; CHECK: %d13 = A2_tfrpi -1 + ; CHECK-NOT: undef %r4 + + bb.1.for.body: + successors: %bb.3.for.end, %bb.2.if.end82 + + ADJCALLSTACKDOWN 0, implicit-def dead %r29, implicit-def dead %r30, implicit %r31, implicit %r30, implicit %r29 + J2_call @lrand48, implicit-def dead %d0, implicit-def dead %d1, implicit-def dead %d2, implicit-def dead %d3, implicit-def dead %d4, implicit-def dead %d5, implicit-def dead %d6, implicit-def dead %d7, implicit-def dead %r28, implicit-def dead %r31, implicit-def dead %p0, implicit-def dead %p1, implicit-def dead %p2, implicit-def dead %p3, implicit-def dead %m0, implicit-def dead %m1, implicit-def dead %lc0, implicit-def dead %lc1, implicit-def dead %sa0, implicit-def dead %sa1, implicit-def dead %usr, implicit-def %usr_ovf, implicit-def dead %cs0, implicit-def dead %cs1, implicit-def dead %w0, implicit-def dead %w1, implicit-def dead %w2, implicit-def dead %w3, implicit-def dead %w4, implicit-def dead %w5, implicit-def dead %w6, implicit-def dead %w7, implicit-def dead %w8, implicit-def dead %w9, implicit-def dead %w10, implicit-def dead %w11, implicit-def dead %w12, implicit-def dead %w13, implicit-def dead %w14, implicit-def dead %w15, implicit-def dead %q0, implicit-def dead %q1, implicit-def dead %q2, implicit-def dead %q3, implicit-def %r0 + ADJCALLSTACKUP 0, 0, implicit-def dead %r29, implicit-def dead %r30, implicit-def dead %r31, implicit %r29 + undef %29:subreg_loreg = COPY killed %r0 + %29:subreg_hireg = S2_asr_i_r %29:subreg_loreg, 31 + ADJCALLSTACKDOWN 0, implicit-def dead %r29, implicit-def dead %r30, implicit %r31, implicit %r30, implicit %r29 + J2_call @lrand48, implicit-def dead %d0, implicit-def dead %d1, implicit-def dead %d2, implicit-def dead %d3, implicit-def dead %d4, implicit-def dead %d5, implicit-def dead %d6, implicit-def dead %d7, implicit-def dead %r28, implicit-def dead %r31, implicit-def dead %p0, implicit-def dead %p1, implicit-def dead %p2, implicit-def dead %p3, implicit-def dead %m0, implicit-def dead %m1, implicit-def dead %lc0, implicit-def dead %lc1, implicit-def dead %sa0, implicit-def dead %sa1, implicit-def dead %usr, implicit-def %usr_ovf, implicit-def dead %cs0, implicit-def dead %cs1, implicit-def dead %w0, implicit-def dead %w1, implicit-def dead %w2, implicit-def dead %w3, implicit-def dead %w4, implicit-def dead %w5, implicit-def dead %w6, implicit-def dead %w7, implicit-def dead %w8, implicit-def dead %w9, implicit-def dead %w10, implicit-def dead %w11, implicit-def dead %w12, implicit-def dead %w13, implicit-def dead %w14, implicit-def dead %w15, implicit-def dead %q0, implicit-def dead %q1, implicit-def dead %q2, implicit-def dead %q3, implicit-def %r0 + ADJCALLSTACKUP 0, 0, implicit-def dead %r29, implicit-def dead %r30, implicit-def dead %r31, implicit %r29 + %32:subreg_loreg = COPY killed %r0 + %7 = S2_extractup %32, 22, 9 + ADJCALLSTACKDOWN 0, implicit-def dead %r29, implicit-def dead %r30, implicit %r31, implicit %r30, implicit %r29 + J2_call @lrand48, implicit-def dead %d0, implicit-def dead %d1, implicit-def dead %d2, implicit-def dead %d3, implicit-def dead %d4, implicit-def dead %d5, implicit-def dead %d6, implicit-def dead %d7, implicit-def dead %r28, implicit-def dead %r31, implicit-def dead %p0, implicit-def dead %p1, implicit-def dead %p2, implicit-def dead %p3, implicit-def dead %m0, implicit-def dead %m1, implicit-def dead %lc0, implicit-def dead %lc1, implicit-def dead %sa0, implicit-def dead %sa1, implicit-def dead %usr, implicit-def %usr_ovf, implicit-def dead %cs0, implicit-def dead %cs1, implicit-def dead %w0, implicit-def dead %w1, implicit-def dead %w2, implicit-def dead %w3, implicit-def dead %w4, implicit-def dead %w5, implicit-def dead %w6, implicit-def dead %w7, implicit-def dead %w8, implicit-def dead %w9, implicit-def dead %w10, implicit-def dead %w11, implicit-def dead %w12, implicit-def dead %w13, implicit-def dead %w14, implicit-def dead %w15, implicit-def dead %q0, implicit-def dead %q1, implicit-def dead %q2, implicit-def dead %q3, implicit-def %r0 + ADJCALLSTACKUP 0, 0, implicit-def dead %r29, implicit-def dead %r30, implicit-def dead %r31, implicit %r29 + undef %43:subreg_loreg = COPY killed %r0 + %43:subreg_hireg = COPY %32:subreg_hireg + %16 = S2_extractup %43, 6, 25 + %18 = A2_tfrpi -1 + %18 = S2_asl_r_p_acc %18, %47, %16:subreg_loreg + ADJCALLSTACKDOWN 0, implicit-def dead %r29, implicit-def dead %r30, implicit %r31, implicit %r30, implicit %r29 + J2_call @lrand48, implicit-def dead %d0, implicit-def dead %d1, implicit-def dead %d2, implicit-def dead %d3, implicit-def dead %d4, implicit-def dead %d5, implicit-def dead %d6, implicit-def dead %d7, implicit-def dead %r28, implicit-def dead %r31, implicit-def dead %p0, implicit-def dead %p1, implicit-def dead %p2, implicit-def dead %p3, implicit-def dead %m0, implicit-def dead %m1, implicit-def dead %lc0, implicit-def dead %lc1, implicit-def dead %sa0, implicit-def dead %sa1, implicit-def dead %usr, implicit-def %usr_ovf, implicit-def dead %cs0, implicit-def dead %cs1, implicit-def dead %w0, implicit-def dead %w1, implicit-def dead %w2, implicit-def dead %w3, implicit-def dead %w4, implicit-def dead %w5, implicit-def dead %w6, implicit-def dead %w7, implicit-def dead %w8, implicit-def dead %w9, implicit-def dead %w10, implicit-def dead %w11, implicit-def dead %w12, implicit-def dead %w13, implicit-def dead %w14, implicit-def dead %w15, implicit-def dead %q0, implicit-def dead %q1, implicit-def dead %q2, implicit-def dead %q3 + ADJCALLSTACKUP 0, 0, implicit-def dead %r29, implicit-def dead %r30, implicit-def dead %r31, implicit %r29 + %22 = S2_asl_r_p %18, %8:subreg_loreg + %21 = COPY %13 + %21 = S2_lsr_i_p_and %21, %29, 9 + %22 = S2_asl_i_p_and %22, %7, 42 + S2_storerd_io undef %23, 0, %22 :: (store 8 into `i64* undef`) + %25 = C2_cmpeqp %21, %51 + J2_jumpt %25, %bb.3.for.end, implicit-def dead %pc + J2_jump %bb.2.if.end82, implicit-def dead %pc + + bb.2.if.end82: + successors: %bb.3.for.end, %bb.1.for.body + + %59 = A2_addi %59, -1 + %26 = C2_cmpeqi %59, 0 + J2_jumpf %26, %bb.1.for.body, implicit-def dead %pc + J2_jump %bb.3.for.end, implicit-def dead %pc + + bb.3.for.end: + +...