Index: llvm/include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -356,6 +356,15 @@ return SubRegIndexLaneMasks[SubIdx]; } + /// Try to find a set of subregister indexes to cover \p LaneMask. Returns + /// the best matching index, or NoSubRegister if this is impossible. If + /// multiple subregister indexes are required to exactly cover, they will be + /// returned in \p Indexes. + unsigned getCoveringSubRegIndexes(MachineRegisterInfo &MRI, + const TargetRegisterClass *RC, + LaneBitmask LaneMask, + SmallVectorImpl &Indexes) const; + /// The lane masks returned by getSubRegIndexLaneMask() above can only be /// used to determine if sub-registers overlap - they can't be used to /// determine if a set of sub-registers completely cover another Index: llvm/lib/CodeGen/SplitKit.cpp =================================================================== --- llvm/lib/CodeGen/SplitKit.cpp +++ llvm/lib/CodeGen/SplitKit.cpp @@ -557,33 +557,11 @@ // First pass: Try to find a perfectly matching subregister index. If none // exists find the one covering the most lanemask bits. - SmallVector PossibleIndexes; - unsigned BestIdx = 0; - unsigned BestCover = 0; const TargetRegisterClass *RC = MRI.getRegClass(FromReg); assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class"); - for (unsigned Idx = 1, E = TRI.getNumSubRegIndices(); Idx < E; ++Idx) { - // Is this index even compatible with the given class? - if (TRI.getSubClassWithSubReg(RC, Idx) != RC) - continue; - LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx); - // Early exit if we found a perfect match. - if (SubRegMask == LaneMask) { - BestIdx = Idx; - break; - } - // The index must not cover any lanes outside \p LaneMask. - if ((SubRegMask & ~LaneMask).any()) - continue; - - unsigned PopCount = SubRegMask.getNumLanes(); - PossibleIndexes.push_back(Idx); - if (PopCount > BestCover) { - BestCover = PopCount; - BestIdx = Idx; - } - } + SmallVector Indexes; + unsigned BestIdx = TRI.getCoveringSubRegIndexes(MRI, RC, LaneMask, Indexes); // Abort if we cannot possibly implement the COPY with the given indexes. if (BestIdx == 0) @@ -592,36 +570,9 @@ SlotIndex Def = buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx, DestLI, Late, SlotIndex()); - // Greedy heuristic: Keep iterating keeping the best covering subreg index - // each time. - LaneBitmask LanesLeft = LaneMask & ~(TRI.getSubRegIndexLaneMask(BestIdx)); - while (LanesLeft.any()) { - unsigned BestIdx = 0; - int BestCover = std::numeric_limits::min(); - for (unsigned Idx : PossibleIndexes) { - LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(Idx); - // Early exit if we found a perfect match. - if (SubRegMask == LanesLeft) { - BestIdx = Idx; - break; - } - - // Try to cover as much of the remaining lanes as possible but - // as few of the already covered lanes as possible. - int Cover = (SubRegMask & LanesLeft).getNumLanes() - - (SubRegMask & ~LanesLeft).getNumLanes(); - if (Cover > BestCover) { - BestCover = Cover; - BestIdx = Idx; - } - } - - if (BestIdx == 0) - report_fatal_error("Impossible to implement partial COPY"); - + for (unsigned BestIdx : Indexes) { buildSingleSubRegCopy(FromReg, ToReg, MBB, InsertBefore, BestIdx, DestLI, Late, Def); - LanesLeft &= ~TRI.getSubRegIndexLaneMask(BestIdx); } return Def; Index: llvm/lib/CodeGen/TargetRegisterInfo.cpp =================================================================== --- llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -509,6 +509,75 @@ return getRegSizeInBits(*RC); } +unsigned TargetRegisterInfo::getCoveringSubRegIndexes( + MachineRegisterInfo &MRI, const TargetRegisterClass *RC, + LaneBitmask LaneMask, SmallVectorImpl &NeededIndexes) const { + SmallVector PossibleIndexes; + unsigned BestIdx = 0; + unsigned BestCover = 0; + // assert(RC == MRI.getRegClass(ToReg) && "Should have same reg class"); + for (unsigned Idx = 1, E = getNumSubRegIndices(); Idx < E; ++Idx) { + // Is this index even compatible with the given class? + if (getSubClassWithSubReg(RC, Idx) != RC) + continue; + LaneBitmask SubRegMask = getSubRegIndexLaneMask(Idx); + // Early exit if we found a perfect match. + if (SubRegMask == LaneMask) { + BestIdx = Idx; + break; + } + + // The index must not cover any lanes outside \p LaneMask. + if ((SubRegMask & ~LaneMask).any()) + continue; + + unsigned PopCount = SubRegMask.getNumLanes(); + PossibleIndexes.push_back(Idx); + if (PopCount > BestCover) { + BestCover = PopCount; + BestIdx = Idx; + } + } + + // Abort if we cannot possibly implement the COPY with the given indexes. + if (BestIdx == 0) + return 0; + + // Greedy heuristic: Keep iterating keeping the best covering subreg index + // each time. + LaneBitmask LanesLeft = LaneMask & ~getSubRegIndexLaneMask(BestIdx); + while (LanesLeft.any()) { + unsigned BestIdx = 0; + int BestCover = std::numeric_limits::min(); + for (unsigned Idx : PossibleIndexes) { + LaneBitmask SubRegMask = getSubRegIndexLaneMask(Idx); + // Early exit if we found a perfect match. + if (SubRegMask == LanesLeft) { + BestIdx = Idx; + break; + } + + // Try to cover as much of the remaining lanes as possible but + // as few of the already covered lanes as possible. + int Cover = (SubRegMask & LanesLeft).getNumLanes() - + (SubRegMask & ~LanesLeft).getNumLanes(); + if (Cover > BestCover) { + BestCover = Cover; + BestIdx = Idx; + } + } + + if (BestIdx == 0) + return 0; // Impossible to handle + + NeededIndexes.push_back(BestIdx); + + LanesLeft &= ~getSubRegIndexLaneMask(BestIdx); + } + + return BestIdx; +} + Register TargetRegisterInfo::lookThruCopyLike(Register SrcReg, const MachineRegisterInfo *MRI) const {