Index: lib/Target/AMDGPU/SIRegisterInfo.h =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.h +++ lib/Target/AMDGPU/SIRegisterInfo.h @@ -240,11 +240,11 @@ unsigned getMaxNumVGPRs(const MachineFunction &MF) const; private: - void buildScratchLoadStore(MachineBasicBlock::iterator MI, - unsigned LoadStoreOp, const MachineOperand *SrcDst, - unsigned ScratchRsrcReg, unsigned ScratchOffset, - int64_t Offset, - RegScavenger *RS) const; + void buildSpillLoadStore(MachineBasicBlock::iterator MI, + unsigned LoadStoreOp, const MachineOperand *SrcDst, + unsigned ScratchRsrcReg, unsigned ScratchOffset, + int64_t Offset, + RegScavenger *RS) const; }; } // End namespace llvm Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -320,14 +320,82 @@ } } -void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI, - unsigned LoadStoreOp, - const MachineOperand *SrcDst, - unsigned ScratchRsrcReg, - unsigned ScratchOffset, - int64_t Offset, - RegScavenger *RS) const { +static int getOffsetMUBUFStore(unsigned Opc) { + switch (Opc) { + case AMDGPU::BUFFER_STORE_DWORD_OFFEN: + return AMDGPU::BUFFER_STORE_DWORD_OFFSET; + case AMDGPU::BUFFER_STORE_BYTE_OFFEN: + return AMDGPU::BUFFER_STORE_BYTE_OFFSET; + case AMDGPU::BUFFER_STORE_SHORT_OFFEN: + return AMDGPU::BUFFER_STORE_SHORT_OFFSET; + case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN: + return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; + case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: + return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; + default: + return -1; + } +} + +static int getOffsetMUBUFLoad(unsigned Opc) { + switch (Opc) { + case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: + return AMDGPU::BUFFER_LOAD_DWORD_OFFSET; + case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN: + return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET; + case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN: + return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET; + case AMDGPU::BUFFER_LOAD_USHORT_OFFEN: + return AMDGPU::BUFFER_LOAD_USHORT_OFFSET; + case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN: + return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET; + case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN: + return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; + case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: + return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; + default: + return -1; + } +} + +// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not +// need to handle the case where an SGPR may need to be spilled while spilling. +static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII, + MachineFrameInfo &MFI, + MachineBasicBlock::iterator MI, + int Index, + int64_t Offset) { + MachineBasicBlock *MBB = MI->getParent(); + const DebugLoc &DL = MI->getDebugLoc(); + bool IsStore = MI->mayStore(); + + unsigned Opc = MI->getOpcode(); + int LoadStoreOp = IsStore ? + getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc); + if (LoadStoreOp == -1) + return false; + + unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg(); + + BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp)) + .addReg(Reg, getDefRegState(!IsStore)) + .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) + .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) + .addImm(Offset) + .addImm(0) // glc + .addImm(0) // slc + .addImm(0) // tfe + .setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + return true; +} +void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, + unsigned LoadStoreOp, + const MachineOperand *SrcDst, + unsigned ScratchRsrcReg, + unsigned ScratchOffset, + int64_t Offset, + RegScavenger *RS) const { unsigned Value = SrcDst->getReg(); bool IsKill = SrcDst->isKill(); MachineBasicBlock *MBB = MI->getParent(); @@ -404,44 +472,6 @@ } } -static int getOffsetMUBUFStore(unsigned Opc) { - switch (Opc) { - case AMDGPU::BUFFER_STORE_DWORD_OFFEN: - return AMDGPU::BUFFER_STORE_DWORD_OFFSET; - case AMDGPU::BUFFER_STORE_BYTE_OFFEN: - return AMDGPU::BUFFER_STORE_BYTE_OFFSET; - case AMDGPU::BUFFER_STORE_SHORT_OFFEN: - return AMDGPU::BUFFER_STORE_SHORT_OFFSET; - case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN: - return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET; - case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN: - return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET; - default: - return -1; - } -} - -static int getOffsetMUBUFLoad(unsigned Opc) { - switch (Opc) { - case AMDGPU::BUFFER_LOAD_DWORD_OFFEN: - return AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN: - return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET; - case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN: - return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET; - case AMDGPU::BUFFER_LOAD_USHORT_OFFEN: - return AMDGPU::BUFFER_LOAD_USHORT_OFFSET; - case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN: - return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET; - case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN: - return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET; - case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN: - return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET; - default: - return -1; - } -} - void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { @@ -612,7 +642,7 @@ case AMDGPU::SI_SPILL_V96_SAVE: case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V32_SAVE: - buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, + buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), @@ -627,7 +657,7 @@ case AMDGPU::SI_SPILL_V128_RESTORE: case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { - buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, + buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(), @@ -640,55 +670,19 @@ default: { if (TII->isMUBUF(*MI)) { // Disable offen so we don't need a 0 vgpr base. - assert(FIOperandNum == + assert(static_cast(FIOperandNum) == AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::vaddr)); - const DebugLoc &DL = MI->getDebugLoc(); - if (MI->mayLoad()) { - int OffenToOffset = getOffsetMUBUFLoad(MI->getOpcode()); - if (OffenToOffset != -1) { - int64_t Offset = FrameInfo.getObjectOffset(Index); - - int64_t OldImm = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); - int64_t NewOffset = OldImm + Offset; - - if (isUInt<12>(NewOffset)) { - unsigned DstReg - = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg(); - BuildMI(*MBB, MI, DL, TII->get(OffenToOffset), DstReg) - .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) - .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) - .addImm(NewOffset) // offset - .addImm(0) // glc - .addImm(0) // slc - .addImm(0); // tfe - MI->eraseFromParent(); - break; - } - } - } else if (MI->mayStore()) { - int OffenToOffset = getOffsetMUBUFStore(MI->getOpcode()); - if (OffenToOffset != -1) { - int64_t Offset = FrameInfo.getObjectOffset(Index); - - int64_t OldImm = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); - int64_t NewOffset = OldImm + Offset; - - if (isUInt<12>(NewOffset)) { - // Disable offen so we don't need a 0 vgpr base - BuildMI(*MBB, MI, DL, TII->get(OffenToOffset)) - .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)) - .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)) - .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)) - .addImm(NewOffset) // offset - .addImm(0) // glc - .addImm(0) // slc - .addImm(0); // tfe - MI->eraseFromParent(); - break; - } - } + int64_t Offset = FrameInfo.getObjectOffset(Index); + int64_t OldImm + = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(); + int64_t NewOffset = OldImm + Offset; + + if (isUInt<12>(NewOffset) && + buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) { + MI->eraseFromParent(); + break; } }