diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -316,39 +316,22 @@ } if (isMUBUF(LdSt) || isMTBUF(LdSt)) { - const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset); - if (SOffset && SOffset->isReg()) { - // We can only handle this if it's a stack access, as any other resource - // would require reporting multiple base registers. - const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); - if (AddrReg && !AddrReg->isFI()) - return false; - - const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); - const SIMachineFunctionInfo *MFI - = LdSt.getParent()->getParent()->getInfo(); - if (RSrc->getReg() != MFI->getScratchRSrcReg()) - return false; - - const MachineOperand *OffsetImm = - getNamedOperand(LdSt, AMDGPU::OpName::offset); - BaseOps.push_back(RSrc); - BaseOps.push_back(SOffset); - Offset = OffsetImm->getImm(); - } else { - BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); - if (!BaseOp) // e.g. BUFFER_WBINVL1_VOL - return false; + const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc); + if (!RSrc) // e.g. BUFFER_WBINVL1_VOL + return false; + BaseOps.push_back(RSrc); + BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); + if (BaseOp && !BaseOp->isFI()) BaseOps.push_back(BaseOp); - - BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr); - if (BaseOp) - BaseOps.push_back(BaseOp); - - const MachineOperand *OffsetImm = - getNamedOperand(LdSt, AMDGPU::OpName::offset); - Offset = OffsetImm->getImm(); - if (SOffset) // soffset can be an inline immediate. + const MachineOperand *OffsetImm = + getNamedOperand(LdSt, AMDGPU::OpName::offset); + Offset = OffsetImm->getImm(); + const MachineOperand *SOffset = + getNamedOperand(LdSt, AMDGPU::OpName::soffset); + if (SOffset) { + if (SOffset->isReg()) + BaseOps.push_back(SOffset); + else Offset += SOffset->getImm(); } // Get appropriate operand, and compute width accordingly. diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll --- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll +++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll @@ -149,7 +149,7 @@ ; GCN-NEXT: - .registers: ; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}} ; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} -; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}} +; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}} ; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}} ; GCN-NEXT: .shader_functions: ; GCN-NEXT: dynamic_stack: diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll @@ -739,8 +739,8 @@ ; GCN-LABEL: {{^}}tail_call_byval_align16: ; GCN-NOT: s32 -; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8 ; GCN: buffer_load_dword [[VREG2:v[0-9]+]], off, s[0:3], s32 offset:12 +; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8 ; GCN: s_getpc_b64 diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll @@ -3294,14 +3294,15 @@ ; GFX9-LABEL: tail_call_byval_align16: ; GFX9: ; %bb.0: ; %entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 -; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12 +; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12 +; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; GFX9-NEXT: s_getpc_b64 s[4:5] ; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4 ; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4 -; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 ; GFX9-NEXT: s_setpc_b64 s[4:5] ; ; GFX10-LABEL: tail_call_byval_align16: diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.tbuffer.load.ll @@ -39,8 +39,8 @@ } ; GCN-LABEL: {{^}}tbuffer_load_immoffs_large -; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095 ; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73 +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095 ; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1 ; GFX10-DAG: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_FMT_10_10_10_2_SSCALED] offset:4095 ; GFX10-DAG: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_FMT_32_32_UINT] offset:73 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.tbuffer.load.ll @@ -42,8 +42,8 @@ ; GCN-LABEL: {{^}}tbuffer_load_immoffs_large ; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0 -; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095 ; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] idxen offset:73 +; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095 ; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:1 ; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_FMT_10_10_10_2_SSCALED] idxen offset:4095 ; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_FMT_32_32_UINT] idxen offset:73 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tbuffer.load.ll @@ -33,8 +33,8 @@ } ; GCN-LABEL: {{^}}tbuffer_load_immoffs_large -; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73 +; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095 ; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1 ; GCN: s_waitcnt define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) { diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll --- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -173,9 +173,9 @@ ; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}} ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}} ; CI-NOHSA-NOT: v_add +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 ; CI-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}} ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} -; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}} @@ -203,10 +203,10 @@ ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16: ; SI: s_mov_b32 {{s[0-9]+}}, 0x13480 -; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32 -; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48 +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16 +; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64 ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}} ; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}} ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}