Index: lib/CodeGen/SplitKit.h =================================================================== --- lib/CodeGen/SplitKit.h +++ lib/CodeGen/SplitKit.h @@ -405,6 +405,13 @@ /// deleteRematVictims - Delete defs that are dead after rematerializing. void deleteRematVictims(); + /// Add a copy instruction copying \p FromReg to \p ToReg before + /// \p InsertBefore. This can be invoked with a \p LaneMask which may make it + /// necessary to construct a sequence of copies to cover it exactly. + SlotIndex buildCopy(unsigned FromReg, unsigned ToReg, LaneBitmask LaneMask, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, bool Late); + public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. Index: lib/CodeGen/SplitKit.cpp =================================================================== --- lib/CodeGen/SplitKit.cpp +++ lib/CodeGen/SplitKit.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -487,12 +488,106 @@ VFP = ValueForcePair(nullptr, true); } +SlotIndex SplitEditor::buildCopy(unsigned FromReg, unsigned ToReg, + LaneBitmask LaneMask, MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertBefore, + bool Late) { + const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); + SlotIndexes &Indexes = *LIS.getSlotIndexes(); + SlotIndex Def; + if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) { + // The full vreg is copied. + MachineInstr *CopyMI = + BuildMI(MBB, InsertBefore, DebugLoc(), Desc, ToReg).addReg(FromReg); + Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); + } else { + // Only a subset of lanes needs to be copied. The following is a simple + // heuristic to construct a sequence of COPYs. We could add a target + // specific callback if this turns out to be suboptimal. + + // First pass: Try to find a perfectly matching subregister index. If none + // exists find the one covering the most lanemask bits. + SmallVector PossibleIndexes; + unsigned BestIdx = 0; + unsigned BestCover = 0; + const TargetRegisterClass *RC = MRI.getRegClass(FromReg); + assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class"); + for (unsigned Idx = 1, E = TRI.getNumSubRegIndices(); Idx < E; ++Idx) { + // Is this index even compatible with the given class? + if (TRI.getSubClassWithSubReg(RC, Idx) != RC) + continue; + LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx); + // Early exit if we found a perfect match. + if (SubRegMask == LaneMask) { + BestIdx = Idx; + break; + } + + // The index must not cover any lanes outside \p LaneMask. + if ((SubRegMask & ~LaneMask).any()) + continue; + + unsigned PopCount = countPopulation(SubRegMask.getAsInteger()); + PossibleIndexes.push_back(Idx); + if (PopCount > BestCover) { + BestCover = PopCount; + BestIdx = Idx; + } + } + + // Abort if we cannot possibly implement the COPY with the given indexes. + if (BestIdx == 0) + report_fatal_error("Impossible to implement partial COPY"); + + MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc) + .addReg(ToReg, RegState::Define | RegState::Undef, BestIdx) + .addReg(FromReg, 0, BestIdx); + Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); + + // Greedy heuristic: Keep iterating keeping the best covering subreg index + // each time. + LaneBitmask LanesLeft = + LaneMask & ~(TRI.getSubRegIndexLaneMask(BestCover)); + while (LanesLeft.any()) { + unsigned BestIdx = 0; + int BestCover = INT_MIN; + for (unsigned Idx : PossibleIndexes) { + LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx); + // Early exit if we found a perfect match. + if (SubRegMask == LanesLeft) { + BestIdx = Idx; + break; + } + + // Try to cover as much of the remaining lanes as possible but + // as few of the already covered lanes as possible. + int Cover = countPopulation((SubRegMask & LanesLeft).getAsInteger()) + - countPopulation((SubRegMask & ~LanesLeft).getAsInteger()); + if (Cover > BestCover) { + BestCover = Cover; + BestIdx = Idx; + } + } + + if (BestIdx == 0) + report_fatal_error("Impossible to implement partial COPY"); + + MachineInstr *CopyMI = BuildMI(MBB, InsertBefore, DebugLoc(), Desc) + .addReg(ToReg, RegState::Define, BestIdx) + .addReg(FromReg, 0, BestIdx); + Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot(); + LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx); + } + } + + return Def; +} + VNInfo *SplitEditor::defFromParent(unsigned RegIdx, VNInfo *ParentVNI, SlotIndex UseIdx, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) { - MachineInstr *CopyMI = nullptr; SlotIndex Def; LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); @@ -506,43 +601,27 @@ VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx); bool DidRemat = false; + unsigned Reg = LI->reg; if (OrigVNI) { LiveRangeEdit::Remat RM(ParentVNI); RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def); if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) { - Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); + Def = Edit->rematerializeAt(MBB, I, Reg, RM, TRI, Late); ++NumRemats; DidRemat = true; } } if (!DidRemat) { - // Can't remat, just insert a copy from parent. - CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) - .addReg(Edit->getReg()); - Def = LIS.getSlotIndexes() - ->insertMachineInstrInMaps(*CopyMI, Late) - .getRegSlot(); + LaneBitmask LaneMask; if (LI->hasSubRanges()) { - LaneBitmask LM = LaneBitmask::getNone(); + LaneMask = LaneBitmask::getNone(); for (LiveInterval::SubRange &S : LI->subranges()) - LM |= S.LaneMask; - - if (MRI.getMaxLaneMaskForVReg(LI->reg) != LM) { - // Find subreg for the lane mask. - unsigned SubIdx = 0; - for (unsigned I = 1, E = TRI.getNumSubRegIndices(); I < E; ++I) { - if (TRI.getSubRegIndexLaneMask(I) == LM) { - SubIdx = I; - break; - } - } - if (SubIdx == 0) - report_fatal_error("Cannot find subreg index to cover all alive lanes"); - CopyMI->getOperand(0).setSubReg(SubIdx); - CopyMI->getOperand(1).setSubReg(SubIdx); - CopyMI->getOperand(0).setIsUndef(true); - } + LaneMask |= S.LaneMask; + } else { + LaneMask = LaneBitmask::getAll(); } + + Def = buildCopy(Edit->getReg(), Reg, LaneMask, MBB, I, Late); ++NumCopies; }