Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -349,51 +349,17 @@ MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { + const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg); - static const int16_t Sub0_15[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, - AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, - AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, - }; - - static const int16_t Sub0_15_64[] = { - AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, - AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, - AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, - AMDGPU::sub12_sub13, AMDGPU::sub14_sub15, - }; - - static const int16_t Sub0_7[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, - }; - - static const int16_t Sub0_7_64[] = { - AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, - AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, - }; - - static const int16_t Sub0_3[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, - }; - - static const int16_t Sub0_3_64[] = { - AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, - }; - - static const int16_t Sub0_2[] = { - AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, - }; - - static const int16_t Sub0_1[] = { - AMDGPU::sub0, AMDGPU::sub1, - }; - - unsigned Opcode; - ArrayRef SubIndices; + if (RC == &AMDGPU::VGPR_32RegClass) { + assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || + AMDGPU::SReg_32RegClass.contains(SrcReg)); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - if (AMDGPU::SReg_32RegClass.contains(DestReg)) { + if (RC == &AMDGPU::SReg_32RegClass) { if (SrcReg == AMDGPU::SCC) { BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg) .addImm(-1) @@ -405,8 +371,9 @@ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; + } - } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) { + if (RC == &AMDGPU::SReg_64RegClass) { if (DestReg == AMDGPU::VCC) { if (AMDGPU::SReg_64RegClass.contains(SrcReg)) { BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC) @@ -426,68 +393,29 @@ BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; + } - } else if (DestReg == AMDGPU::SCC) { + if (DestReg == AMDGPU::SCC) { assert(AMDGPU::SReg_32RegClass.contains(SrcReg)); BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32)) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0); - return; - } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) { - assert(AMDGPU::SReg_128RegClass.contains(SrcReg)); - Opcode = AMDGPU::S_MOV_B64; - SubIndices = Sub0_3_64; - - } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) { - assert(AMDGPU::SReg_256RegClass.contains(SrcReg)); - Opcode = AMDGPU::S_MOV_B64; - SubIndices = Sub0_7_64; - - } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) { - assert(AMDGPU::SReg_512RegClass.contains(SrcReg)); - Opcode = AMDGPU::S_MOV_B64; - SubIndices = Sub0_15_64; - - } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) { - assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) || - AMDGPU::SReg_32RegClass.contains(SrcReg)); - BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); return; + } - } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_64RegClass.contains(SrcReg) || - AMDGPU::SReg_64RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_1; - - } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_96RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_2; - - } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_128RegClass.contains(SrcReg) || - AMDGPU::SReg_128RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_3; - - } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_256RegClass.contains(SrcReg) || - AMDGPU::SReg_256RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_7; - - } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) { - assert(AMDGPU::VReg_512RegClass.contains(SrcReg) || - AMDGPU::SReg_512RegClass.contains(SrcReg)); - Opcode = AMDGPU::V_MOV_B32_e32; - SubIndices = Sub0_15; - - } else { - llvm_unreachable("Can't copy register!"); + unsigned EltSize = 4; + unsigned Opcode = AMDGPU::V_MOV_B32_e32; + if (RI.isSGPRClass(RC)) { + if (RC->getSize() > 4) { + Opcode = AMDGPU::S_MOV_B64; + EltSize = 8; + } else { + Opcode = AMDGPU::S_MOV_B32; + EltSize = 4; + } } + ArrayRef SubIndices = RI.getRegSplitParts(RC, EltSize); bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg); for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) { Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -251,6 +251,9 @@ /// unit requirement. unsigned getMaxNumVGPRs(const MachineFunction &MF) const; + ArrayRef getRegSplitParts(const TargetRegisterClass *RC, + unsigned EltSize) const; + private: void buildSpillLoadStore(MachineBasicBlock::iterator MI, unsigned LoadStoreOp, const MachineOperand *SrcDst, Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -1306,3 +1306,106 @@ return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST); } + +ArrayRef SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC, + unsigned EltSize) const { + if (EltSize == 4) { + static const int16_t Sub0_15[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11, + AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15, + }; + + static const int16_t Sub0_7[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7, + }; + + static const int16_t Sub0_3[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, + }; + + static const int16_t Sub0_2[] = { + AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, + }; + + static const int16_t Sub0_1[] = { + AMDGPU::sub0, AMDGPU::sub1, + }; + + switch (AMDGPU::getRegBitWidth(*RC->MC)) { + case 32: + return {}; + case 64: + return Sub0_1; + case 96: + return Sub0_2; + case 128: + return Sub0_3; + case 256: + return Sub0_7; + case 512: + return Sub0_15; + default: + llvm_unreachable("unhandled register size"); + } + } + + if (EltSize == 8) { + static const int16_t Sub0_15_64[] = { + AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, + AMDGPU::sub4_sub5, AMDGPU::sub6_sub7, + AMDGPU::sub8_sub9, AMDGPU::sub10_sub11, + AMDGPU::sub12_sub13, AMDGPU::sub14_sub15 + }; + + static const int16_t Sub0_7_64[] = { + AMDGPU::sub0_sub1, AMDGPU::sub2_sub3, + AMDGPU::sub4_sub5, AMDGPU::sub6_sub7 + }; + + + static const int16_t Sub0_3_64[] = { + AMDGPU::sub0_sub1, AMDGPU::sub2_sub3 + }; + + switch (AMDGPU::getRegBitWidth(*RC->MC)) { + case 64: + return {}; + case 128: + return Sub0_3_64; + case 256: + return Sub0_7_64; + case 512: + return Sub0_15_64; + default: + llvm_unreachable("unhandled register size"); + } + } + + assert(EltSize == 16 && "unhandled register spill split size"); + + static const int16_t Sub0_15_128[] = { + AMDGPU::sub0_sub1_sub2_sub3, + AMDGPU::sub4_sub5_sub6_sub7, + AMDGPU::sub8_sub9_sub10_sub11, + AMDGPU::sub12_sub13_sub14_sub15 + }; + + static const int16_t Sub0_7_128[] = { + AMDGPU::sub0_sub1_sub2_sub3, + AMDGPU::sub4_sub5_sub6_sub7 + }; + + switch (AMDGPU::getRegBitWidth(*RC->MC)) { + case 128: + return {}; + case 256: + return Sub0_7_128; + case 512: + return Sub0_15_128; + default: + llvm_unreachable("unhandled register size"); + } +}