Index: llvm/lib/Target/AMDGPU/SIInstrInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1217,6 +1217,9 @@ LLVM_READONLY int getFlatScratchInstSSfromSV(uint16_t Opcode); + LLVM_READONLY + int getFlatScratchInstSVfromSS(uint16_t Opcode); + const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); Index: llvm/lib/Target/AMDGPU/SIInstrInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5021,6 +5021,8 @@ assert(isSegmentSpecificFLAT(Inst)); int NewOpc = AMDGPU::getGlobalVaddrOp(Opc); + if (NewOpc < 0) + NewOpc = AMDGPU::getFlatScratchInstSVfromSS(Opc); if (NewOpc < 0) return false; @@ -5034,14 +5036,15 @@ return false; int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr); - assert(OldVAddrIdx >= 0); - // Check vaddr, it shall be zero - MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx); - MachineInstr *Def = MRI.getUniqueVRegDef(VAddr.getReg()); - if (!Def || Def->getOpcode() != AMDGPU::V_MOV_B32_e32 || - !Def->getOperand(1).isImm() || Def->getOperand(1).getImm() != 0) - return false; + // Check vaddr, it shall be zero or absent. + if (OldVAddrIdx >= 0) { + MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx); + MachineInstr *Def = MRI.getUniqueVRegDef(VAddr.getReg()); + if (!Def || Def->getOpcode() != AMDGPU::V_MOV_B32_e32 || + !Def->getOperand(1).isImm() || Def->getOperand(1).getImm() != 0) + return false; + } const MCInstrDesc &NewDesc = get(NewOpc); Inst.setDesc(NewDesc); @@ -5052,11 +5055,14 @@ MRI.removeRegOperandFromUseList(&NewVAddr); MRI.moveOperands(&NewVAddr, &SAddr, 1); Inst.RemoveOperand(OldSAddrIdx); - } else { - assert(OldSAddrIdx == NewVAddrIdx); - Inst.RemoveOperand(OldVAddrIdx); + return true; } + assert(OldSAddrIdx == NewVAddrIdx); + + if (OldVAddrIdx >= 0) + Inst.RemoveOperand(OldVAddrIdx); + return true; } Index: llvm/lib/Target/AMDGPU/SIInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2549,6 +2549,14 @@ let ValueCols = [["SS"]]; } +def getFlatScratchInstSVfromSS : InstrMapping { + let FilterClass = "FlatScratchInst"; + let RowFields = ["SVOp"]; + let ColFields = ["Mode"]; + let KeyCol = ["SS"]; + let ValueCols = [["SV"]]; +} + include "SIInstructions.td" include "DSInstructions.td" Index: llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir +++ llvm/test/CodeGen/AMDGPU/move-load-addr-to-valu.mir @@ -352,8 +352,7 @@ ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1 - ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec - ; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN: [[SCRATCH_LOAD_DWORD:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD [[PHI]], 0, 0, implicit $exec, implicit $flat_scr ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec ; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec @@ -389,8 +388,7 @@ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec - ; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr + ; GCN: SCRATCH_STORE_DWORD [[DEF]], [[PHI]], 0, 0, implicit $exec, implicit $flat_scr ; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec ; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec