Index: lib/Target/AMDGPU/SIDefines.h =================================================================== --- lib/Target/AMDGPU/SIDefines.h +++ lib/Target/AMDGPU/SIDefines.h @@ -40,9 +40,10 @@ FLAT = 1 << 21, WQM = 1 << 22, VGPRSpill = 1 << 23, - VOPAsmPrefer32Bit = 1 << 24, - Gather4 = 1 << 25, - DisableWQM = 1 << 26 + SGPRSpill = 1 << 24, + VOPAsmPrefer32Bit = 1 << 25, + Gather4 = 1 << 26, + DisableWQM = 1 << 27 }; } Index: lib/Target/AMDGPU/SIInstrFormats.td =================================================================== --- lib/Target/AMDGPU/SIInstrFormats.td +++ lib/Target/AMDGPU/SIInstrFormats.td @@ -45,6 +45,7 @@ // Whether WQM _must_ be enabled for this instruction. field bits<1> WQM = 0; field bits<1> VGPRSpill = 0; + field bits<1> SGPRSpill = 0; // This bit tells the assembler to use the 32-bit encoding in case it // is unable to infer the encoding from the operands. @@ -84,9 +85,10 @@ let TSFlags{21} = FLAT; let TSFlags{22} = WQM; let TSFlags{23} = VGPRSpill; - let TSFlags{24} = VOPAsmPrefer32Bit; - let TSFlags{25} = Gather4; - let TSFlags{26} = DisableWQM; + let TSFlags{24} = SGPRSpill; + let TSFlags{25} = VOPAsmPrefer32Bit; + let TSFlags{26} = Gather4; + let TSFlags{27} = DisableWQM; let SchedRW = [Write32Bit]; Index: lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.h +++ lib/Target/AMDGPU/SIInstrInfo.h @@ -356,6 +356,14 @@ return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill; } + static bool isSGPRSpill(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill; + } + + bool isSGPRSpill(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill; + } + static bool isDPP(const MachineInstr &MI) { return MI.getDesc().TSFlags & SIInstrFlags::DPP; } @@ -541,6 +549,14 @@ return get(pseudoToMCOpcode(Opcode)); } + unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const; + unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const; + + unsigned isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + unsigned isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const override; + unsigned getInstSizeInBytes(const MachineInstr &MI) const override; ArrayRef> Index: lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIInstrInfo.cpp +++ lib/Target/AMDGPU/SIInstrInfo.cpp @@ -599,8 +599,8 @@ } BuildMI(MBB, MI, DL, OpDesc) - .addReg(SrcReg, getKillRegState(isKill)) // src - .addFrameIndex(FrameIndex) // frame_idx + .addReg(SrcReg, getKillRegState(isKill)) // data + .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO); return; @@ -621,8 +621,8 @@ unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize()); MFI->setHasSpilledVGPRs(); BuildMI(MBB, MI, DL, get(Opcode)) - .addReg(SrcReg, getKillRegState(isKill)) // src - .addFrameIndex(FrameIndex) // frame_idx + .addReg(SrcReg, getKillRegState(isKill)) // data + .addFrameIndex(FrameIndex) // addr .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(0) // offset @@ -693,7 +693,7 @@ } BuildMI(MBB, MI, DL, OpDesc, DestReg) - .addFrameIndex(FrameIndex) // frame_idx + .addFrameIndex(FrameIndex) // addr .addMemOperand(MMO); return; @@ -712,7 +712,7 @@ unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize()); BuildMI(MBB, MI, DL, get(Opcode), DestReg) - .addFrameIndex(FrameIndex) // frame_idx + .addFrameIndex(FrameIndex) // vaddr .addReg(MFI->getScratchRSrcReg()) // scratch_rsrc .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset .addImm(0) // offset @@ -3169,6 +3169,56 @@ return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc); } +unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI, + int &FrameIndex) const { + const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr); + if (!Addr || !Addr->isFI()) + return AMDGPU::NoRegister; + + assert(!MI.memoperands_empty() && + (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); + + FrameIndex = Addr->getIndex(); + return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); +} + +unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI, + int &FrameIndex) const { + const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr); + assert(Addr && Addr->isFI()); + FrameIndex = Addr->getIndex(); + return getNamedOperand(MI, AMDGPU::OpName::data)->getReg(); +} + +unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + + if (!MI.mayLoad()) + return AMDGPU::NoRegister; + + if (isMUBUF(MI) || isVGPRSpill(MI)) + return isStackAccess(MI, FrameIndex); + + if (isSGPRSpill(MI)) + return isSGPRStackAccess(MI, FrameIndex); + + return AMDGPU::NoRegister; +} + +unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI, + int &FrameIndex) const { + if (!MI.mayStore()) + return AMDGPU::NoRegister; + + if (isMUBUF(MI) || isVGPRSpill(MI)) + return isStackAccess(MI, FrameIndex); + + if (isSGPRSpill(MI)) + return isSGPRStackAccess(MI, FrameIndex); + + return AMDGPU::NoRegister; +} + unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc); Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -1337,17 +1337,17 @@ } // End Uses = [EXEC], Defs = [M0, EXEC] multiclass SI_SPILL_SGPR { - let UseNamedOperandTable = 1, Uses = [EXEC] in { + let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in { def _SAVE : PseudoInstSI < (outs), - (ins sgpr_class:$src, i32imm:$frame_idx)> { + (ins sgpr_class:$data, i32imm:$addr)> { let mayStore = 1; let mayLoad = 0; } def _RESTORE : PseudoInstSI < - (outs sgpr_class:$dst), - (ins i32imm:$frame_idx)> { + (outs sgpr_class:$data), + (ins i32imm:$addr)> { let mayStore = 0; let mayLoad = 1; } @@ -1369,7 +1369,7 @@ SchedRW = [WriteVMEM] in { def _SAVE : VPseudoInstSI < (outs), - (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc, + (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset, i32imm:$offset)> { let mayStore = 1; let mayLoad = 0; @@ -1378,8 +1378,8 @@ } def _RESTORE : VPseudoInstSI < - (outs vgpr_class:$dst), - (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset, + (outs vgpr_class:$vdata), + (ins i32imm:$vaddr, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset, i32imm:$offset)> { let mayStore = 0; let mayLoad = 1; Index: lib/Target/AMDGPU/SIRegisterInfo.cpp =================================================================== --- lib/Target/AMDGPU/SIRegisterInfo.cpp +++ lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -599,7 +599,7 @@ case AMDGPU::SI_SPILL_V64_SAVE: case AMDGPU::SI_SPILL_V32_SAVE: buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET, - TII->getNamedOperand(*MI, AMDGPU::OpName::src), + TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo.getObjectOffset(Index) + @@ -614,7 +614,7 @@ case AMDGPU::SI_SPILL_V256_RESTORE: case AMDGPU::SI_SPILL_V512_RESTORE: { buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET, - TII->getNamedOperand(*MI, AMDGPU::OpName::dst), + TII->getNamedOperand(*MI, AMDGPU::OpName::vdata), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(), TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(), FrameInfo.getObjectOffset(Index) +