diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -40,6 +40,11 @@ /// all elements of the inner vector combined give a full lane mask. static std::array, 16> RegSplitParts; + // Table representing sub reg of given width and offset. + // First index is subreg size: 32, 64, 96, 128, 160, 192, 256, 512. + // Second index is 32 different dword offsets. + static uint16_t SubRegFromChannelTable[8][32]; + void reserveRegisterTuples(BitVector &, MCRegister Reg) const; public: diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -40,6 +40,14 @@ cl::init(true)); std::array, 16> SIRegisterInfo::RegSplitParts; +uint16_t SIRegisterInfo::SubRegFromChannelTable[8][32]; + +// Map numbers of DWORDs to indexes in SubRegFromChannelTable. +// Valid indexes are shifted 1, such that a 0 mapping means unsupported. +// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8, +// meaning index 7 in SubRegFromChannelTable.. +static const uint16_t SubRegFromChannelTableWidthMap[17] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9}; SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST), @@ -78,8 +86,30 @@ } }; + static llvm::once_flag InitializeSubRegFromChannelTableFlag; + + static auto InitializeSubRegFromChannelTableOnce = [this]() { + for (unsigned Width = 0; Width < array_lengthof(SubRegFromChannelTable); + ++Width) + for (unsigned Offset = 0; + Offset < array_lengthof(SubRegFromChannelTable[0]); ++Offset) + SubRegFromChannelTable[Width][Offset] = AMDGPU::NoSubRegister; + + for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) { + unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32; + unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32; + assert(Width < array_lengthof(SubRegFromChannelTableWidthMap)); + Width = SubRegFromChannelTableWidthMap[Width]; + if (Width == 0) + continue; + assert(Offset >= 0 && Offset <= 32); + SubRegFromChannelTable[Width - 1][Offset] = Idx; + } + }; llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce); + llvm::call_once(InitializeSubRegFromChannelTableFlag, + InitializeSubRegFromChannelTableOnce); } void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, @@ -156,71 +186,13 @@ return CSR_AMDGPU_AllAllocatableSRegs_RegMask; } -// FIXME: TableGen should generate something to make this manageable for all -// register classes. At a minimum we could use the opposite of -// composeSubRegIndices and go up from the base 32-bit subreg. unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) { - // Table of NumRegs sized pieces at every 32-bit offset. - static const uint16_t SubRegFromChannelTable[][32] = { - {AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, - AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, - AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, - AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, - AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, - AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, - AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31}, - {AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, - AMDGPU::sub3_sub4, AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, - AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, AMDGPU::sub8_sub9, - AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12, - AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, - AMDGPU::sub15_sub16, AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, - AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, AMDGPU::sub20_sub21, - AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24, - AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, - AMDGPU::sub27_sub28, AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, - AMDGPU::sub30_sub31, AMDGPU::NoSubRegister}, - {AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, - AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5, - AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, - AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9, - AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, - AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13, - AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, - AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17, - AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, - AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21, - AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, - AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25, - AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, - AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29, - AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, - AMDGPU::NoSubRegister, AMDGPU::NoSubRegister}, - {AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, - AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6, - AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, - AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10, - AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, - AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14, - AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, - AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18, - AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, - AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22, - AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, - AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26, - AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, - AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30, - AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, - AMDGPU::NoSubRegister, AMDGPU::NoSubRegister}}; - - const unsigned NumRegIndex = NumRegs - 1; - - assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) && - "Not implemented"); + assert(NumRegs < array_lengthof(SubRegFromChannelTableWidthMap)); + unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs]; + assert(NumRegIndex && "Not implemented"); assert(Channel < array_lengthof(SubRegFromChannelTable[0])); - return SubRegFromChannelTable[NumRegIndex][Channel]; + return SubRegFromChannelTable[NumRegIndex - 1][Channel]; } MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(