Index: llvm/include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -628,6 +628,14 @@ return RC; } + /// Return a register class that can be used for a subregister copy from/into + /// \p SuperRC at \p Idx. + virtual const TargetRegisterClass * + getSubRegisterClass(const TargetRegisterClass *SuperRC, + unsigned SubRegIdx) const { + return nullptr; + } + /// Return the subregister index you get from composing /// two subregister indices. /// Index: llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -277,7 +277,8 @@ assert(TRI->isSGPRClass(SrcRC) && "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); - SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); + if (SrcSubReg) + SrcRC = TRI->getSubRegisterClass(SrcRC, SrcSubReg); const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); Register TmpReg = MRI.createVirtualRegister(NewSrcRC); @@ -1133,7 +1134,8 @@ Register SrcReg = MI->getOperand(1).getReg(); unsigned SubReg = MI->getOperand(1).getSubReg(); const TargetRegisterClass *SrcRC = TRI->getRegClassForReg(*MRI, SrcReg); - SrcRC = TRI->getSubRegClass(SrcRC, SubReg); + if (SubReg) + SrcRC = TRI->getSubRegisterClass(SrcRC, SubReg); size_t SrcSize = TRI->getRegSizeInBits(*SrcRC); if (SrcSize == 16) { // HACK to handle possible 16bit VGPR source Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -897,7 +897,9 @@ TRI->getRegClassForReg(*MRI, OpToFold.getReg()); if (TRI->hasVectorRegisters(RC) && OpToFold.getSubReg()) { unsigned SubReg = OpToFold.getSubReg(); - const TargetRegisterClass *SubRC = TRI->getSubRegClass(RC, SubReg); + const TargetRegisterClass *SubRC = RC; + if (SubReg) + SubRC = TRI->getSubRegisterClass(RC, SubReg); RC = TRI->getCompatibleSubRegClass(RC, SubRC, SubReg); if (RC) RC = SubRC; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4071,9 +4071,9 @@ : &AMDGPU::VReg_64RegClass; const TargetRegisterClass *Src0SubRC = - TRI->getSubRegClass(Src0RC, AMDGPU::sub0); + TRI->getSubRegisterClass(Src0RC, AMDGPU::sub0); const TargetRegisterClass *Src1SubRC = - TRI->getSubRegClass(Src1RC, AMDGPU::sub1); + TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1); MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm( MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); @@ -4159,7 +4159,7 @@ .addImm(0); } else { const TargetRegisterClass *SubRC = - TRI->getSubRegClass(Src2RC, AMDGPU::sub0); + TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0); MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm( MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC); MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm( Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4105,7 +4105,7 @@ const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg); if (RI.hasVectorRegisters(RC) && MO.getSubReg()) { const TargetRegisterClass *SubRC = - RI.getSubRegClass(RC, MO.getSubReg()); + RI.getSubRegisterClass(RC, MO.getSubReg()); RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg()); if (RC) RC = SubRC; @@ -6721,14 +6721,16 @@ MRI.getRegClass(Src0.getReg()) : &AMDGPU::SGPR_32RegClass; - const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); + const TargetRegisterClass *Src0SubRC = + RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); - const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); + const TargetRegisterClass *NewDestSubRC = + RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0); Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); @@ -6785,8 +6787,10 @@ const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg()); const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg()); - const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); - const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); + const TargetRegisterClass *Src0SubRC = + RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); + const TargetRegisterClass *Src1SubRC = + RI.getSubRegisterClass(Src1RC, AMDGPU::sub0); MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); @@ -6851,12 +6855,14 @@ MRI.getRegClass(Src0.getReg()) : &AMDGPU::SGPR_32RegClass; - const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); + const TargetRegisterClass *Src0SubRC = + RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); const TargetRegisterClass *Src1RC = Src1.isReg() ? MRI.getRegClass(Src1.getReg()) : &AMDGPU::SGPR_32RegClass; - const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); + const TargetRegisterClass *Src1SubRC = + RI.getSubRegisterClass(Src1RC, AMDGPU::sub0); MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); @@ -6869,7 +6875,8 @@ const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); - const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); + const TargetRegisterClass *NewDestSubRC = + RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0); Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) @@ -6958,7 +6965,8 @@ Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); + const TargetRegisterClass *SrcSubRC = + RI.getSubRegisterClass(SrcRC, AMDGPU::sub0); MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC); Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -247,12 +247,6 @@ const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const; - /// \returns The canonical register class that is used for a sub-register of - /// \p RC for the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC - /// will be returned. - const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC, - unsigned SubIdx) const; - /// Returns a register class which is compatible with \p SuperRC, such that a /// subregister exists with class \p SubRC with subregister index \p /// SubIdx. If this is impossible (e.g., an unaligned subregister index within Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2723,26 +2723,6 @@ return SRC; } -const TargetRegisterClass *SIRegisterInfo::getSubRegClass( - const TargetRegisterClass *RC, unsigned SubIdx) const { - if (SubIdx == AMDGPU::NoSubRegister) - return RC; - - // We can assume that each lane corresponds to one 32-bit register. - unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32; - if (isAGPRClass(RC)) { - RC = getAGPRClassForBitWidth(Size); - } else if (isVGPRClass(RC)) { - RC = getVGPRClassForBitWidth(Size); - } else if (isVectorSuperClass(RC)) { - RC = getVectorSuperClassForBitWidth(Size); - } else { - RC = getSGPRClassForBitWidth(Size); - } - assert(RC && "Invalid sub-register class size"); - return RC; -} - const TargetRegisterClass * SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, Index: llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -1454,7 +1454,7 @@ const TargetRegisterClass *regClass = Reg.isVirtual() ? MRI->getRegClass(Reg) : TRI->getPhysRegClass(Reg); if (SubReg) - regClass = TRI->getSubRegClass(regClass, SubReg); + regClass = TRI->getSubRegisterClass(regClass, SubReg); const unsigned MovOp = TII->getMovOpcode(regClass); MI->setDesc(TII->get(MovOp)); Index: llvm/utils/TableGen/RegisterInfoEmitter.cpp =================================================================== --- llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -1169,6 +1169,8 @@ << " LaneBitmask reverseComposeSubRegIndexLaneMaskImpl" << "(unsigned, LaneBitmask) const override;\n" << " const TargetRegisterClass *getSubClassWithSubReg" + << "(const TargetRegisterClass *, unsigned) const override;\n" + << " const TargetRegisterClass *getSubRegisterClass" << "(const TargetRegisterClass *, unsigned) const override;\n"; } OS << " const RegClassWeight &getRegClassWeight(" @@ -1511,8 +1513,8 @@ emitComposeSubRegIndexLaneMask(OS, RegBank, ClassName); } - // Emit getSubClassWithSubReg. if (!SubRegIndices.empty()) { + // Emit getSubClassWithSubReg. OS << "const TargetRegisterClass *" << ClassName << "::getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx)" << " const {\n"; @@ -1541,6 +1543,53 @@ << " assert(Idx < " << SubRegIndicesSize << " && \"Bad subreg\");\n" << " unsigned TV = Table[RC->getID()][Idx];\n" << " return TV ? getRegClass(TV - 1) : nullptr;\n}\n\n"; + + // Emit getSubRegisterClass + OS << "const TargetRegisterClass *" << ClassName + << "::getSubRegisterClass(const TargetRegisterClass *RC, unsigned Idx)" + << " const {\n"; + + // Use the smallest type that can hold a regclass ID with room for a + // sentinel. + if (RegisterClasses.size() < UINT8_MAX) + OS << " static const uint8_t Table["; + else if (RegisterClasses.size() < UINT16_MAX) + OS << " static const uint16_t Table["; + else + PrintFatalError("Too many register classes."); + + OS << RegisterClasses.size() << "][" << SubRegIndicesSize << "] = {\n"; + + for (const auto &RC : RegisterClasses) { + OS << " {\t// " << RC.getName() << '\n'; + for (auto &Idx : SubRegIndices) { + Optional> + MatchingSubClass = RC.getMatchingSubClassWithSubRegs(RegBank, &Idx); + + unsigned EnumValue = 0; + if (MatchingSubClass) { + CodeGenRegisterClass *SubRegClass = MatchingSubClass->second; + EnumValue = SubRegClass->EnumValue + 1; + } + + OS << " " << EnumValue << ",\t// " + << RC.getName() << ':' << Idx.getName(); + + if (MatchingSubClass) { + CodeGenRegisterClass *SubRegClass = MatchingSubClass->second; + OS << " -> " << SubRegClass->getName(); + } + + OS << '\n'; + } + + OS << " },\n"; + } + OS << " };\n assert(RC && \"Missing regclass\");\n" + << " if (!Idx) return RC;\n --Idx;\n" + << " assert(Idx < " << SubRegIndicesSize << " && \"Bad subreg\");\n" + << " unsigned TV = Table[RC->getID()][Idx];\n" + << " return TV ? getRegClass(TV - 1) : nullptr;\n}\n\n"; } EmitRegUnitPressure(OS, RegBank, ClassName);