Index: llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -44,14 +44,18 @@ // Handle G_PTRTOINT (G_PTR_ADD base, const) case if (Def->getOpcode() == TargetOpcode::G_PTRTOINT) { MachineInstr *Base; - if (mi_match(Def->getOperand(1).getReg(), MRI, - m_GPtrAdd(m_MInstr(Base), m_ICst(Offset)))) { - // If Base was int converted to pointer, simply return int and offset. - if (Base->getOpcode() == TargetOpcode::G_INTTOPTR) - return std::make_pair(Base->getOperand(1).getReg(), Offset); + Register OffsetReg; + if (mi_match(Def->getOperand(1).getReg(), MRI, + m_GPtrAdd(m_MInstr(Base), m_Reg(OffsetReg)))) { + if (mi_match(OffsetReg, MRI, m_ICst(Offset)) || + mi_match(OffsetReg, MRI, m_Copy(m_ICst(Offset)))) { + // If Base was int converted to pointer, simply return int and offset. + if (Base->getOpcode() == TargetOpcode::G_INTTOPTR) + return std::make_pair(Base->getOperand(1).getReg(), Offset); - // Register returned here will be of pointer type. - return std::make_pair(Base->getOperand(0).getReg(), Offset); + // Register returned here will be of pointer type. + return std::make_pair(Base->getOperand(0).getReg(), Offset); + } } } Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1329,6 +1329,13 @@ std::tie(Base, Offset) = AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset); + // If BaseReg is a pointer, convert it to int. + if (MRI->getType(Base).isPointer()) { + const RegisterBank *BaseBank = RBI.getRegBank(Base, *MRI, *RBI.TRI); + Base = B.buildPtrToInt(MRI->getType(CombinedOffset), Base).getReg(0); + MRI->setRegBank(Base, *BaseBank); + } + uint32_t SOffset, ImmOffset; if (Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset, &RBI.Subtarget, Alignment)) { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/merge-s-buffer-loads.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/merge-s-buffer-loads.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -o - %s | FileCheck %s + +declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) +declare void @llvm.amdgcn.exp.i32(i32 immarg, i32 immarg, i32, i32, i32, i32, i1, i1) + +define amdgpu_cs void @test1(i32 %index, <4 x i32> inreg %desc, <4 x i32> addrspace(6)* inreg %array) { +; CHECK-LABEL: test1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: exp mrt0 off, off, off, off +; CHECK-NEXT: s_endpgm +.entry: + %ep11 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 0 + %ei11 = ptrtoint i32 addrspace(6)* %ep11 to i32 + %el11 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %ei11, i32 0) + + %ep12 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 1 + %ei12 = ptrtoint i32 addrspace(6)* %ep12 to i32 + %el12 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %ei12, i32 0) + + %ep13 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 2 + %ei13 = ptrtoint i32 addrspace(6)* %ep13 to i32 + %el13 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %ei13, i32 0) + + %ep14 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 3 + %ei14 = ptrtoint i32 addrspace(6)* %ep14 to i32 + %el14 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %ei14, i32 0) + + call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 %el11, i32 %el12, i32 %el13, i32 %el14, i1 false, i1 false) + + ret void +} + +define amdgpu_cs void @test2(i32 %index, <4 x i32> addrspace(6)* inreg %array) { +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, s0, v0 +; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: exp mrt0 off, off, off, off +; CHECK-NEXT: s_endpgm +.entry: + %ep11 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 0 + %ei11 = ptrtoint i32 addrspace(6)* %ep11 to i32 + %el11 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %ei11, i32 0) + + %ep12 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 1 + %ei12 = ptrtoint i32 addrspace(6)* %ep12 to i32 + %el12 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %ei12, i32 0) + + %ep13 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 2 + %ei13 = ptrtoint i32 addrspace(6)* %ep13 to i32 + %el13 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %ei13, i32 0) + + %ep14 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 3 + %ei14 = ptrtoint i32 addrspace(6)* %ep14 to i32 + %el14 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %ei14, i32 0) + + call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 %el11, i32 %el12, i32 %el13, i32 %el14, i1 false, i1 false) + + ret void +}