Index: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4375,8 +4375,12 @@ Register Reg = Root.getReg(); const SIMachineFunctionInfo *Info = MF->getInfo(); - const MachineInstr *Def = MRI->getVRegDef(Reg); - if (Register WaveBase = getWaveAddress(Def)) { + std::optional Def = + getDefSrcRegIgnoringCopies(Reg, *MRI); + assert(Def && "this shouldn't be an optional result"); + Reg = Def->Reg; + + if (Register WaveBase = getWaveAddress(Def->MI)) { return {{ [=](MachineInstrBuilder &MIB) { // rsrc MIB.addReg(Info->getScratchRSrcReg()); @@ -4392,10 +4396,12 @@ // FIXME: Copy check is a hack Register BasePtr; - if (mi_match(Reg, *MRI, m_GPtrAdd(m_Reg(BasePtr), m_Copy(m_ICst(Offset))))) { + if (mi_match(Reg, *MRI, + m_GPtrAdd(m_Reg(BasePtr), + m_any_of(m_ICst(Offset), m_Copy(m_ICst(Offset)))))) { if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset)) return {}; - const MachineInstr *BasePtrDef = MRI->getVRegDef(BasePtr); + MachineInstr *BasePtrDef = getDefIgnoringCopies(BasePtr, *MRI); Register WaveBase = getWaveAddress(BasePtrDef); if (!WaveBase) return {}; Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3846,9 +3846,8 @@ // This case is weird because we expect a physical register in the source, // but need to set a bank anyway. // - // We could select the result to SGPR or VGPR, but for the one current use - // it's more practical to always use VGPR. - OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, 32); + // TODO: We could select the result to SGPR or VGPR + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 32); break; } Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir @@ -781,16 +781,14 @@ ; GFX6: liveins: $vgpr0, $vgpr1 ; GFX6-NEXT: {{ $}} ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec - ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec - ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[V_MOV_B32_e32_]], 0, implicit $exec - ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec - ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_LSHRREV_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; ; GFX11-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095 ; GFX11: liveins: $vgpr0, $vgpr1 ; GFX11-NEXT: {{ $}} @@ -830,6 +828,7 @@ ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]] ; GFX6-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_CO_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5) + ; ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096 ; GFX9: liveins: $vgpr0, $vgpr1 ; GFX9-NEXT: {{ $}} Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgpu-wave-address.mir @@ -9,7 +9,7 @@ body: | bb.0: ; CHECK-LABEL: name: amdgpu_wave_address - ; CHECK: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + ; CHECK: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 ; CHECK-NEXT: S_ENDPGM 0, implicit [[AMDGPU_WAVE_ADDRESS]](p5) %0:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 S_ENDPGM 0, implicit %0 @@ -23,9 +23,10 @@ bb.0: ; CHECK-LABEL: name: amdgpu_wave_address_v ; CHECK: [[DEF:%[0-9]+]]:sgpr(p1) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) - ; CHECK-NEXT: G_STORE [[AMDGPU_WAVE_ADDRESS]](p5), [[COPY]](p1) :: (store (p5), addrspace 1) + ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:sgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(p5) = COPY [[AMDGPU_WAVE_ADDRESS]](p5) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(p1) = COPY [[DEF]](p1) + ; CHECK-NEXT: G_STORE [[COPY]](p5), [[COPY1]](p1) :: (store (p5), addrspace 1) %0:_(p1) = G_IMPLICIT_DEF %1:_(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32 G_STORE %1, %0 :: (store (p5), addrspace 1)