Index: llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp =================================================================== --- llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp +++ llvm/lib/Target/AMDGPU/GCNRegBankReassign.cpp @@ -372,19 +372,19 @@ unsigned ShiftedBank = Bank; if (Bank != -1 && R == Reg && Op.getSubReg()) { - unsigned LM = TRI->getSubRegIndexLaneMask(Op.getSubReg()).getAsInteger(); - if (!(LM & 1) && (Bank < NUM_VGPR_BANKS)) { + unsigned Offset = TRI->getChannelFromSubReg(Op.getSubReg()); + LaneBitmask LM = TRI->getSubRegIndexLaneMask(Op.getSubReg()); + if (Offset && Bank < NUM_VGPR_BANKS) { // If a register spans all banks we cannot shift it to avoid conflict. - if (countPopulation(LM) >= NUM_VGPR_BANKS) + if (TRI->getNumCoveredRegs(LM) >= NUM_VGPR_BANKS) continue; - ShiftedBank = (Bank + countTrailingZeros(LM)) % NUM_VGPR_BANKS; - } else if (!(LM & 3) && (Bank >= SGPR_BANK_OFFSET)) { + ShiftedBank = (Bank + Offset) % NUM_VGPR_BANKS; + } else if (Offset > 1 && Bank >= SGPR_BANK_OFFSET) { // If a register spans all banks we cannot shift it to avoid conflict. - if (countPopulation(LM) / 2 >= NUM_SGPR_BANKS) + if (TRI->getNumCoveredRegs(LM) / 2 >= NUM_SGPR_BANKS) continue; - ShiftedBank = SGPR_BANK_OFFSET + (Bank - SGPR_BANK_OFFSET + - (countTrailingZeros(LM) >> 1)) % - NUM_SGPR_BANKS; + ShiftedBank = SGPR_BANK_OFFSET + + (Bank - SGPR_BANK_OFFSET + (Offset >> 1)) % NUM_SGPR_BANKS; } } @@ -496,16 +496,16 @@ unsigned FreeBanks = getFreeBanks(Mask, UsedBanks); - unsigned LM = TRI->getSubRegIndexLaneMask(SubReg).getAsInteger(); - if (!(LM & 1) && (Mask & VGPR_BANK_MASK)) { - unsigned Shift = countTrailingZeros(LM); + unsigned Offset = TRI->getChannelFromSubReg(SubReg); + if (Offset && (Mask & VGPR_BANK_MASK)) { + unsigned Shift = Offset; if (Shift >= NUM_VGPR_BANKS) return 0; unsigned VB = FreeBanks & VGPR_BANK_MASK; FreeBanks = ((VB >> Shift) | (VB << (NUM_VGPR_BANKS - Shift))) & VGPR_BANK_MASK; - } else if (!(LM & 3) && (Mask & SGPR_BANK_MASK)) { - unsigned Shift = countTrailingZeros(LM) >> 1; + } else if (Offset > 1 && (Mask & SGPR_BANK_MASK)) { + unsigned Shift = Offset >> 1; if (Shift >= NUM_SGPR_BANKS) return 0; unsigned SB = FreeBanks >> SGPR_BANK_OFFSET; Index: llvm/lib/Target/AMDGPU/GCNRegPressure.cpp =================================================================== --- llvm/lib/Target/AMDGPU/GCNRegPressure.cpp +++ llvm/lib/Target/AMDGPU/GCNRegPressure.cpp @@ -129,7 +129,7 @@ assert(PrevMask < NewMask); Value[Kind == SGPR_TUPLE ? SGPR32 : Kind == AGPR_TUPLE ? AGPR32 : VGPR32] += - Sign * (~PrevMask & NewMask).getNumLanes(); + Sign * SIRegisterInfo::getNumCoveredRegs(~PrevMask & NewMask); if (PrevMask.none()) { assert(NewMask.any()); @@ -221,7 +221,7 @@ return MRI.getTargetRegisterInfo()->getSubRegIndexLaneMask(SubReg); auto MaxMask = MRI.getMaxLaneMaskForVReg(MO.getReg()); - if (MaxMask == LaneBitmask::getLane(0)) // cannot have subregs + if (SIRegisterInfo::getNumCoveredRegs(MaxMask) > 1) // cannot have subregs return MaxMask; // For a tentative schedule LIS isn't updated yet but livemask should remain Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -831,7 +831,7 @@ MI.getParent()->getParent()->getRegInfo(). getRegClass(MO.getReg()), SubReg)) >= 32 && "Sub-dword subregs are not supported"); - return RI.getSubRegIndexLaneMask(SubReg).getNumLanes() * 4; + return RI.getNumChannelesFromSubReg(SubReg) * 4; } } return RI.getRegSizeInBits(*getOpRegClass(MI, OpNo)) / 8; Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -798,7 +798,7 @@ int64_t IdxValue = Idx == 0 ? Value : 0; MachineInstrBuilder Builder = BuildMI(MBB, MI, DL, - get(Opcode), RI.getSubReg(DestReg, Idx)); + get(Opcode), RI.getSubReg(DestReg, SubIndices[Idx])); Builder.addImm(IdxValue); } } Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -289,6 +289,21 @@ const uint32_t *getAllVGPRRegMask() const; const uint32_t *getAllAllocatableSRegMask() const; + // \returns number of 32 bit registers covered by a \p LM + static unsigned getNumCoveredRegs(LaneBitmask LM) { + return LM.getNumLanes(); + } + + // \returns a DWORD offset of a \p SubReg + unsigned getChannelFromSubReg(unsigned SubReg) const { + return SubReg ? alignTo(getSubRegIdxOffset(SubReg), 32) / 32 : 0; + } + + // \returns a DWORD size of a \p SubReg + unsigned getNumChannelesFromSubReg(unsigned SubReg) const { + return getNumCoveredRegs(getSubRegIndexLaneMask(SubReg)); + } + private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1391,7 +1391,7 @@ return RC; // We can assume that each lane corresponds to one 32-bit register. - unsigned Count = getSubRegIndexLaneMask(SubIdx).getNumLanes(); + unsigned Count = getNumChannelesFromSubReg(SubIdx); if (isSGPRClass(RC)) { switch (Count) { case 1: Index: llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -426,8 +426,7 @@ if (Register::isPhysicalRegister(Reg)) { Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I)); } else { - LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub); - Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger())); + Sub = TRI.getSubRegFromChannel(I + TRI.getChannelFromSubReg(Sub)); } } return TargetInstrInfo::RegSubRegPair(Reg, Sub); Index: llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -960,7 +960,7 @@ bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); - const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); + const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || Reg == AMDGPU::SCC; }