Index: llvm/include/llvm/CodeGen/TargetRegisterInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -628,6 +628,14 @@ return RC; } + /// Return a register class that can be used for a subregister copy from/into + /// \p SuperRC at \p Idx. + virtual const TargetRegisterClass * + getSubRegisterClass(const TargetRegisterClass *SuperRC, + unsigned SubRegIdx) const { + return nullptr; + } + /// Return the subregister index you get from composing /// two subregister indices. /// Index: llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -270,14 +270,10 @@ bool IsAGPR = TRI->isAGPRClass(DstRC); for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { - Register SrcReg = MI.getOperand(I).getReg(); - unsigned SrcSubReg = MI.getOperand(I).getSubReg(); - - const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); + const TargetRegisterClass *SrcRC = + TRI->getRegClassForOperandReg(MRI, MI.getOperand(I)); assert(TRI->isSGPRClass(SrcRC) && "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); - - SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); Register TmpReg = MRI.createVirtualRegister(NewSrcRC); @@ -1132,8 +1128,8 @@ Register DstReg = MI->getOperand(0).getReg(); Register SrcReg = MI->getOperand(1).getReg(); unsigned SubReg = MI->getOperand(1).getSubReg(); - const TargetRegisterClass *SrcRC = TRI->getRegClassForReg(*MRI, SrcReg); - SrcRC = TRI->getSubRegClass(SrcRC, SubReg); + const TargetRegisterClass *SrcRC = + TRI->getRegClassForOperandReg(*MRI, MI->getOperand(1)); size_t SrcSize = TRI->getRegSizeInBits(*SrcRC); if (SrcSize == 16) { // HACK to handle possible 16bit VGPR source Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -897,9 +897,8 @@ TRI->getRegClassForReg(*MRI, OpToFold.getReg()); if (TRI->hasVectorRegisters(RC) && OpToFold.getSubReg()) { unsigned SubReg = OpToFold.getSubReg(); - const TargetRegisterClass *SubRC = TRI->getSubRegClass(RC, SubReg); - RC = TRI->getCompatibleSubRegClass(RC, SubRC, SubReg); - if (RC) + if (const TargetRegisterClass *SubRC = + TRI->getSubRegisterClass(RC, SubReg)) RC = SubRC; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4071,9 +4071,9 @@ : &AMDGPU::VReg_64RegClass; const TargetRegisterClass *Src0SubRC = - TRI->getSubRegClass(Src0RC, AMDGPU::sub0); + TRI->getSubRegisterClass(Src0RC, AMDGPU::sub0); const TargetRegisterClass *Src1SubRC = - TRI->getSubRegClass(Src1RC, AMDGPU::sub1); + TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1); MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm( MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); @@ -4159,7 +4159,7 @@ .addImm(0); } else { const TargetRegisterClass *SubRC = - TRI->getSubRegClass(Src2RC, AMDGPU::sub0); + TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0); MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm( MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC); MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm( Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4105,7 +4105,7 @@ const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg); if (RI.hasVectorRegisters(RC) && MO.getSubReg()) { const TargetRegisterClass *SubRC = - RI.getSubRegClass(RC, MO.getSubReg()); + RI.getSubRegisterClass(RC, MO.getSubReg()); RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg()); if (RC) RC = SubRC; @@ -6721,14 +6721,16 @@ MRI.getRegClass(Src0.getReg()) : &AMDGPU::SGPR_32RegClass; - const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); + const TargetRegisterClass *Src0SubRC = + RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); - const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); + const TargetRegisterClass *NewDestSubRC = + RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0); Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0); @@ -6785,8 +6787,10 @@ const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg()); const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg()); - const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); - const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); + const TargetRegisterClass *Src0SubRC = + RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); + const TargetRegisterClass *Src1SubRC = + RI.getSubRegisterClass(Src1RC, AMDGPU::sub0); MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); @@ -6851,12 +6855,14 @@ MRI.getRegClass(Src0.getReg()) : &AMDGPU::SGPR_32RegClass; - const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); + const TargetRegisterClass *Src0SubRC = + RI.getSubRegisterClass(Src0RC, AMDGPU::sub0); const TargetRegisterClass *Src1RC = Src1.isReg() ? MRI.getRegClass(Src1.getReg()) : &AMDGPU::SGPR_32RegClass; - const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0); + const TargetRegisterClass *Src1SubRC = + RI.getSubRegisterClass(Src1RC, AMDGPU::sub0); MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC); @@ -6869,7 +6875,8 @@ const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC); - const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0); + const TargetRegisterClass *NewDestSubRC = + RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0); Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC); MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0) @@ -6958,7 +6965,8 @@ Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); + const TargetRegisterClass *SrcSubRC = + RI.getSubRegisterClass(SrcRC, AMDGPU::sub0); MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, AMDGPU::sub0, SrcSubRC); Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -247,12 +247,6 @@ const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const; - /// \returns The canonical register class that is used for a sub-register of - /// \p RC for the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC - /// will be returned. - const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC, - unsigned SubIdx) const; - /// Returns a register class which is compatible with \p SuperRC, such that a /// subregister exists with class \p SubRC with subregister index \p /// SubIdx. If this is impossible (e.g., an unaligned subregister index within @@ -283,6 +277,10 @@ const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const; + const TargetRegisterClass * + getRegClassForOperandReg(const MachineRegisterInfo &MRI, + const MachineOperand &MO) const; + bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const; bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const; bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const { Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -2723,26 +2723,6 @@ return SRC; } -const TargetRegisterClass *SIRegisterInfo::getSubRegClass( - const TargetRegisterClass *RC, unsigned SubIdx) const { - if (SubIdx == AMDGPU::NoSubRegister) - return RC; - - // We can assume that each lane corresponds to one 32-bit register. - unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32; - if (isAGPRClass(RC)) { - RC = getAGPRClassForBitWidth(Size); - } else if (isVGPRClass(RC)) { - RC = getVGPRClassForBitWidth(Size); - } else if (isVectorSuperClass(RC)) { - RC = getVectorSuperClassForBitWidth(Size); - } else { - RC = getSGPRClassForBitWidth(Size); - } - assert(RC && "Invalid sub-register class size"); - return RC; -} - const TargetRegisterClass * SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, @@ -2833,6 +2813,13 @@ return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg); } +const TargetRegisterClass * +SIRegisterInfo::getRegClassForOperandReg(const MachineRegisterInfo &MRI, + const MachineOperand &MO) const { + const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg()); + return getSubRegisterClass(SrcRC, MO.getSubReg()); +} + bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI, Register Reg) const { const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg); Index: llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -1448,14 +1448,10 @@ assert(MI->getNumExplicitOperands() == 2); const Register Reg = MI->getOperand(0).getReg(); - const unsigned SubReg = MI->getOperand(0).getSubReg(); - - if (TRI->isVGPR(*MRI, Reg)) { - const TargetRegisterClass *regClass = - Reg.isVirtual() ? MRI->getRegClass(Reg) : TRI->getPhysRegClass(Reg); - if (SubReg) - regClass = TRI->getSubRegClass(regClass, SubReg); + const TargetRegisterClass *regClass = + TRI->getRegClassForOperandReg(*MRI, MI->getOperand(0)); + if (TRI->isVGPRClass(regClass)) { const unsigned MovOp = TII->getMovOpcode(regClass); MI->setDesc(TII->get(MovOp)); Index: llvm/utils/TableGen/RegisterInfoEmitter.cpp =================================================================== --- llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -1169,6 +1169,8 @@ << " LaneBitmask reverseComposeSubRegIndexLaneMaskImpl" << "(unsigned, LaneBitmask) const override;\n" << " const TargetRegisterClass *getSubClassWithSubReg" + << "(const TargetRegisterClass *, unsigned) const override;\n" + << " const TargetRegisterClass *getSubRegisterClass" << "(const TargetRegisterClass *, unsigned) const override;\n"; } OS << " const RegClassWeight &getRegClassWeight(" @@ -1511,16 +1513,16 @@ emitComposeSubRegIndexLaneMask(OS, RegBank, ClassName); } - // Emit getSubClassWithSubReg. if (!SubRegIndices.empty()) { + // Emit getSubClassWithSubReg. OS << "const TargetRegisterClass *" << ClassName << "::getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx)" << " const {\n"; // Use the smallest type that can hold a regclass ID with room for a // sentinel. - if (RegisterClasses.size() < UINT8_MAX) + if (RegisterClasses.size() <= UINT8_MAX) OS << " static const uint8_t Table["; - else if (RegisterClasses.size() < UINT16_MAX) + else if (RegisterClasses.size() <= UINT16_MAX) OS << " static const uint16_t Table["; else PrintFatalError("Too many register classes."); @@ -1541,6 +1543,53 @@ << " assert(Idx < " << SubRegIndicesSize << " && \"Bad subreg\");\n" << " unsigned TV = Table[RC->getID()][Idx];\n" << " return TV ? getRegClass(TV - 1) : nullptr;\n}\n\n"; + + // Emit getSubRegisterClass + OS << "const TargetRegisterClass *" << ClassName + << "::getSubRegisterClass(const TargetRegisterClass *RC, unsigned Idx)" + << " const {\n"; + + // Use the smallest type that can hold a regclass ID with room for a + // sentinel. + if (RegisterClasses.size() <= UINT8_MAX) + OS << " static const uint8_t Table["; + else if (RegisterClasses.size() <= UINT16_MAX) + OS << " static const uint16_t Table["; + else + PrintFatalError("Too many register classes."); + + OS << RegisterClasses.size() << "][" << SubRegIndicesSize << "] = {\n"; + + for (const auto &RC : RegisterClasses) { + OS << " {\t// " << RC.getName() << '\n'; + for (auto &Idx : SubRegIndices) { + Optional> + MatchingSubClass = RC.getMatchingSubClassWithSubRegs(RegBank, &Idx); + + unsigned EnumValue = 0; + if (MatchingSubClass) { + CodeGenRegisterClass *SubRegClass = MatchingSubClass->second; + EnumValue = SubRegClass->EnumValue + 1; + } + + OS << " " << EnumValue << ",\t// " + << RC.getName() << ':' << Idx.getName(); + + if (MatchingSubClass) { + CodeGenRegisterClass *SubRegClass = MatchingSubClass->second; + OS << " -> " << SubRegClass->getName(); + } + + OS << '\n'; + } + + OS << " },\n"; + } + OS << " };\n assert(RC && \"Missing regclass\");\n" + << " if (!Idx) return RC;\n --Idx;\n" + << " assert(Idx < " << SubRegIndicesSize << " && \"Bad subreg\");\n" + << " unsigned TV = Table[RC->getID()][Idx];\n" + << " return TV ? getRegClass(TV - 1) : nullptr;\n}\n\n"; } EmitRegUnitPressure(OS, RegBank, ClassName);