diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1215,12 +1215,21 @@ LLVM_READONLY int getVCMPXNoSDstOp(uint16_t Opcode); + /// \returns ST form with only immediate offset of a FLAT Scratch instruction + /// given an \p Opcode of an SS (SADDR) form. LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode); + /// \returns SS (SADDR) form of a FLAT Scratch instruction given an \p Opcode + /// of an SV (VADDR) form. LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode); + /// \returns SV (VADDR) form of a FLAT Scratch instruction given an \p Opcode + /// of an SS (SADDR) form. + LLVM_READONLY + int getFlatScratchInstSVfromSS(uint16_t Opcode); + const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5021,6 +5021,8 @@ assert(isSegmentSpecificFLAT(Inst)); int NewOpc = AMDGPU::getGlobalVaddrOp(Opc); + if (NewOpc < 0) + NewOpc = AMDGPU::getFlatScratchInstSVfromSS(Opc); if (NewOpc < 0) return false; @@ -5034,14 +5036,17 @@ return false; int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); - assert(OldVAddrIdx >= 0); - // Check vaddr, it shall be zero - MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx); - MachineInstr *VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg()); - if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 || - !VAddrDef->getOperand(1).isImm() || VAddrDef->getOperand(1).getImm() != 0) - return false; + // Check vaddr, it shall be zero or absent. + MachineInstr *VAddrDef = nullptr; + if (OldVAddrIdx >= 0) { + MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx); + VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg()); + if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 || + !VAddrDef->getOperand(1).isImm() || + VAddrDef->getOperand(1).getImm() != 0) + return false; + } const MCInstrDesc &NewDesc = get(NewOpc); Inst.setDesc(NewDesc); @@ -5060,10 +5065,12 @@ MRI.addRegOperandToUseList(&NewVAddr); } else { assert(OldSAddrIdx == NewVAddrIdx); - Inst.RemoveOperand(OldVAddrIdx); + + if (OldVAddrIdx >= 0) + Inst.RemoveOperand(OldVAddrIdx); } - if (MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg())) + if (VAddrDef && MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg())) VAddrDef->eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2549,6 +2549,14 @@ let ValueCols = [["SS"]]; } +def getFlatScratchInstSVfromSS : InstrMapping { + let FilterClass = "FlatScratchInst"; + let RowFields = ["SVOp"]; + let ColFields = ["Mode"]; + let KeyCol = ["SS"]; + let ValueCols = [["SV"]]; +} + include "SIInstructions.td" include "DSInstructions.td" diff --git a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir --- a/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir +++ b/llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir @@ -350,8 +350,7 @@ ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1 - ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec - ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PHI]], 0, 0, implicit $exec, implicit $flat_scr ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec ; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec @@ -387,8 +386,7 @@ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec - ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN: SCRATCH_STORE_DWORD [[DEF]], [[PHI]], 0, 0, implicit $exec, implicit $flat_scr ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec ; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec