Index: llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -698,6 +698,8 @@ // 1 = Vector Register Class SmallVector RegSeqArgs(NumVectorElts * 2 + 1); + bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() == + Triple::amdgcn; RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); bool IsRegSeq = true; unsigned NOps = N->getNumOperands(); @@ -707,7 +709,8 @@ IsRegSeq = false; break; } - unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); + unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) + : R600RegisterInfo::getSubRegFromChannel(i); RegSeqArgs[1 + (2 * i)] = N->getOperand(i); RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); } @@ -717,7 +720,8 @@ MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, EltVT); for (unsigned i = NOps; i < NumVectorElts; ++i) { - unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i); + unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) + : R600RegisterInfo::getSubRegFromChannel(i); RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); Index: llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.h @@ -26,10 +26,6 @@ struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { AMDGPURegisterInfo(); - /// \returns the sub reg enum value for the given \p Channel - /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) - static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1); - void reserveRegisterTuples(BitVector &, unsigned Reg) const; }; Index: llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterInfo.cpp @@ -21,61 +21,6 @@ AMDGPURegisterInfo::AMDGPURegisterInfo() : AMDGPUGenRegisterInfo(0) {} -// Table of NumRegs sized pieces at every 32-bit offset. -static const uint16_t SubRegFromChannelTable[][32] = { - { AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, - AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, - AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, - AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, - AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, - AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, - AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31 - }, - { - AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, AMDGPU::sub3_sub4, - AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, - AMDGPU::sub8_sub9, AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12, - AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, AMDGPU::sub15_sub16, - AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, - AMDGPU::sub20_sub21, AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24, - AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, AMDGPU::sub27_sub28, - AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, AMDGPU::sub30_sub31, AMDGPU::NoSubRegister - }, - { - AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5, - AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9, - AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13, - AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17, - AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21, - AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25, - AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29, - AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister - }, - { - AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6, - AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10, - AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14, - AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18, - AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22, - AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26, - AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30, - AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister, AMDGPU::NoSubRegister - } -}; - -// FIXME: TableGen should generate something to make this manageable for all -// register classes. At a minimum we could use the opposite of -// composeSubRegIndices and go up from the base 32-bit subreg. -unsigned AMDGPURegisterInfo::getSubRegFromChannel(unsigned Channel, unsigned NumRegs) { - const unsigned NumRegIndex = NumRegs - 1; - - assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) && - "Not implemented"); - assert(Channel < array_lengthof(SubRegFromChannelTable[0])); - return SubRegFromChannelTable[NumRegIndex][Channel]; -} - void AMDGPURegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const { MCRegAliasIterator R(Reg, this, true); Index: llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp =================================================================== --- llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp +++ llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp @@ -308,7 +308,7 @@ DstMI = Reg; else DstMI = TRI->getMatchingSuperReg(Reg, - AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), + R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), &R600::R600_Reg128RegClass); } if (MO.isUse()) { @@ -317,7 +317,7 @@ SrcMI = Reg; else SrcMI = TRI->getMatchingSuperReg(Reg, - AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), + R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), &R600::R600_Reg128RegClass); } } Index: llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp =================================================================== --- llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp +++ llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp @@ -219,13 +219,13 @@ } } if (IsReduction) { - unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan); + unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(Chan); Src0 = TRI.getSubReg(Src0, SubRegIndex); Src1 = TRI.getSubReg(Src1, SubRegIndex); } else if (IsCube) { static const int CubeSrcSwz[] = {2, 2, 0, 1}; - unsigned SubRegIndex0 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[Chan]); - unsigned SubRegIndex1 = AMDGPURegisterInfo::getSubRegFromChannel(CubeSrcSwz[3 - Chan]); + unsigned SubRegIndex0 = R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz[Chan]); + unsigned SubRegIndex1 = R600RegisterInfo::getSubRegFromChannel(CubeSrcSwz[3 - Chan]); Src1 = TRI.getSubReg(Src0, SubRegIndex1); Src0 = TRI.getSubReg(Src0, SubRegIndex0); } @@ -234,7 +234,7 @@ bool Mask = false; bool NotLast = true; if (IsCube) { - unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(Chan); + unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(Chan); DstReg = TRI.getSubReg(DstReg, SubRegIndex); } else { // Mask the write if the original instruction does not write to Index: llvm/lib/Target/AMDGPU/R600InstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/R600InstrInfo.cpp +++ llvm/lib/Target/AMDGPU/R600InstrInfo.cpp @@ -77,7 +77,7 @@ if (VectorComponents > 0) { for (unsigned I = 0; I < VectorComponents; I++) { - unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I); + unsigned SubRegIndex = R600RegisterInfo::getSubRegFromChannel(I); buildDefaultInstruction(MBB, MI, R600::MOV, RI.getSubReg(DestReg, SubRegIndex), RI.getSubReg(SrcReg, SubRegIndex)) Index: llvm/lib/Target/AMDGPU/R600RegisterInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/R600RegisterInfo.h +++ llvm/lib/Target/AMDGPU/R600RegisterInfo.h @@ -24,6 +24,10 @@ R600RegisterInfo(); + /// \returns the sub reg enum value for the given \p Channel + /// (e.g. getSubRegFromChannel(0) -> R600::sub0) + static unsigned getSubRegFromChannel(unsigned Channel); + BitVector getReservedRegs(const MachineFunction &MF) const override; const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override; Register getFrameRegister(const MachineFunction &MF) const override; Index: llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp @@ -28,6 +28,18 @@ #define GET_REGINFO_TARGET_DESC #include "R600GenRegisterInfo.inc" +unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) { + static const uint16_t SubRegFromChannelTable[] = { + R600::sub0, R600::sub1, R600::sub2, R600::sub3, + R600::sub4, R600::sub5, R600::sub6, R600::sub7, + R600::sub8, R600::sub9, R600::sub10, R600::sub11, + R600::sub12, R600::sub13, R600::sub14, R600::sub15 + }; + + assert(Channel < array_lengthof(SubRegFromChannelTable)); + return SubRegFromChannelTable[Channel]; +} + BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Index: llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp +++ llvm/lib/Target/AMDGPU/SIAddIMGInit.cpp @@ -154,7 +154,7 @@ BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewDst) .addReg(PrevDst) .addReg(SubReg) - .addImm(AMDGPURegisterInfo::getSubRegFromChannel(CurrIdx)); + .addImm(SIRegisterInfo::getSubRegFromChannel(CurrIdx)); PrevDst = NewDst; } Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3303,7 +3303,7 @@ if (Offset >= NumElts || Offset < 0) return std::make_pair(AMDGPU::sub0, Offset); - return std::make_pair(AMDGPURegisterInfo::getSubRegFromChannel(Offset), 0); + return std::make_pair(SIRegisterInfo::getSubRegFromChannel(Offset), 0); } // Return true if the index is an SGPR and was set. Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.h +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.h @@ -42,6 +42,10 @@ public: SIRegisterInfo(const GCNSubtarget &ST); + /// \returns the sub reg enum value for the given \p Channel + /// (e.g. getSubRegFromChannel(0) -> AMDGPU::sub0) + static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs = 1); + bool spillSGPRToVGPR() const { return SpillSGPRToVGPR; } Index: llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -106,6 +106,73 @@ AGPRSetID < NumRegPressureSets); } +// FIXME: TableGen should generate something to make this manageable for all +// register classes. At a minimum we could use the opposite of +// composeSubRegIndices and go up from the base 32-bit subreg. +unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel, + unsigned NumRegs) { + // Table of NumRegs sized pieces at every 32-bit offset. + static const uint16_t SubRegFromChannelTable[][32] = { + {AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, + AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, + AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19, + AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23, + AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27, + AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31}, + {AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3, + AMDGPU::sub3_sub4, AMDGPU::sub4_sub5, AMDGPU::sub5_sub6, + AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, AMDGPU::sub8_sub9, + AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12, + AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15, + AMDGPU::sub15_sub16, AMDGPU::sub16_sub17, AMDGPU::sub17_sub18, + AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, AMDGPU::sub20_sub21, + AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24, + AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27, + AMDGPU::sub27_sub28, AMDGPU::sub28_sub29, AMDGPU::sub29_sub30, + AMDGPU::sub30_sub31, AMDGPU::NoSubRegister}, + {AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3, + AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5, + AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7, + AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9, + AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11, + AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13, + AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15, + AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17, + AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19, + AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21, + AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23, + AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25, + AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27, + AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29, + AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31, + AMDGPU::NoSubRegister, AMDGPU::NoSubRegister}, + {AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4, + AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6, + AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8, + AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10, + AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12, + AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14, + AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16, + AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18, + AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20, + AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22, + AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24, + AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26, + AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28, + AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30, + AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister, + AMDGPU::NoSubRegister, AMDGPU::NoSubRegister}}; + + const unsigned NumRegIndex = NumRegs - 1; + + assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) && + "Not implemented"); + assert(Channel < array_lengthof(SubRegFromChannelTable[0])); + return SubRegFromChannelTable[NumRegIndex][Channel]; +} + unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg( const MachineFunction &MF) const { unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;