Index: llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h +++ llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h @@ -61,8 +61,14 @@ struct ConstantMatch { int64_t &CR; - ConstantMatch(int64_t &C) : CR(C) {} + bool LookThroughCopy; + ConstantMatch(int64_t &C, bool LT) : CR(C), LookThroughCopy(LT) {} bool match(const MachineRegisterInfo &MRI, Register Reg) { + if (LookThroughCopy) { + auto MI = MRI.getVRegDef(Reg); + if (MI && MI->getOpcode() == TargetOpcode::COPY) + Reg = MI->getOperand(1).getReg(); + } if (auto MaybeCst = getConstantVRegSExtVal(Reg, MRI)) { CR = *MaybeCst; return true; @@ -71,7 +77,9 @@ } }; -inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); } +inline ConstantMatch m_ICst(int64_t &Cst, bool LookThroughCopy = false) { + return ConstantMatch(Cst, LookThroughCopy); +} struct ICstRegMatch { Register &CR; Index: llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -33,11 +33,7 @@ int64_t Offset; if (Def->getOpcode() == TargetOpcode::G_ADD) { // TODO: Handle G_OR used for add case - if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(Offset))) - return std::make_pair(Def->getOperand(1).getReg(), Offset); - - // FIXME: matcher should ignore copies - if (mi_match(Def->getOperand(2).getReg(), MRI, m_Copy(m_ICst(Offset)))) + if (mi_match(Def->getOperand(2).getReg(), MRI, m_ICst(Offset, true))) return std::make_pair(Def->getOperand(1).getReg(), Offset); } @@ -45,7 +41,7 @@ if (Def->getOpcode() == TargetOpcode::G_PTRTOINT) { MachineInstr *Base; if (mi_match(Def->getOperand(1).getReg(), MRI, - m_GPtrAdd(m_MInstr(Base), m_ICst(Offset)))) { + m_GPtrAdd(m_MInstr(Base), m_ICst(Offset, true)))) { // If Base was int converted to pointer, simply return int and offset. if (Base->getOpcode() == TargetOpcode::G_INTTOPTR) return std::make_pair(Base->getOperand(1).getReg(), Offset); Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1356,6 +1356,13 @@ std::tie(Base, Offset) = AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset); + // If BaseReg is a pointer, convert it to int. + if (MRI->getType(Base).isPointer()) { + const RegisterBank *BaseBank = RBI.getRegBank(Base, *MRI, *RBI.TRI); + Base = B.buildPtrToInt(MRI->getType(CombinedOffset), Base).getReg(0); + MRI->setRegBank(Base, *BaseBank); + } + uint32_t SOffset, ImmOffset; if ((int)Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset, &RBI.Subtarget, Alignment)) { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/merge-s-buffer-loads.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/merge-s-buffer-loads.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs -o - %s | FileCheck %s + +declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32 immarg) +declare void @llvm.amdgcn.exp.i32(i32 immarg, i32 immarg, i32, i32, i32, i32, i1, i1) + +define amdgpu_cs void @test1(i32 %index, <4 x i32> inreg %desc, <4 x i32> addrspace(6)* inreg %array) { +; CHECK-LABEL: test1: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, s4, v0 +; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: exp mrt0 off, off, off, off +; CHECK-NEXT: s_endpgm +.entry: + %ep11 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 0 + %ei11 = ptrtoint i32 addrspace(6)* %ep11 to i32 + %el11 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %ei11, i32 0) + + %ep12 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 1 + %ei12 = ptrtoint i32 addrspace(6)* %ep12 to i32 + %el12 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %ei12, i32 0) + + %ep13 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 2 + %ei13 = ptrtoint i32 addrspace(6)* %ep13 to i32 + %el13 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %ei13, i32 0) + + %ep14 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 3 + %ei14 = ptrtoint i32 addrspace(6)* %ep14 to i32 + %el14 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 %ei14, i32 0) + + call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 %el11, i32 %el12, i32 %el13, i32 %el14, i1 false, i1 false) + + ret void +} + +define amdgpu_cs void @test2(i32 %index, <4 x i32> addrspace(6)* inreg %array) { +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; CHECK-NEXT: v_add_i32_e32 v0, vcc, s0, v0 +; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: exp mrt0 off, off, off, off +; CHECK-NEXT: s_endpgm +.entry: + %ep11 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 0 + %ei11 = ptrtoint i32 addrspace(6)* %ep11 to i32 + %el11 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %ei11, i32 0) + + %ep12 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 1 + %ei12 = ptrtoint i32 addrspace(6)* %ep12 to i32 + %el12 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %ei12, i32 0) + + %ep13 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 2 + %ei13 = ptrtoint i32 addrspace(6)* %ep13 to i32 + %el13 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %ei13, i32 0) + + %ep14 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %array, i32 %index, i32 3 + %ei14 = ptrtoint i32 addrspace(6)* %ep14 to i32 + %el14 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> undef, i32 %ei14, i32 0) + + call void @llvm.amdgcn.exp.i32(i32 0, i32 0, i32 %el11, i32 %el12, i32 %el13, i32 %el14, i1 false, i1 false) + + ret void +} Index: llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp =================================================================== --- llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp +++ llvm/unittests/CodeGen/GlobalISel/PatternMatchTest.cpp @@ -40,6 +40,18 @@ EXPECT_EQ(Cst, 42); } +TEST_F(AArch64GISelMITest, MatchIntConstantWithLookThrough) { + setUp(); + if (!TM) + return; + auto MIBCst = B.buildConstant(LLT::scalar(64), 42); + auto MIBCpy = B.buildCopy(LLT::scalar(64), MIBCst); + int64_t Cst; + bool match = mi_match(MIBCpy.getReg(0), *MRI, m_ICst(Cst, true)); + EXPECT_TRUE(match); + EXPECT_EQ(Cst, 42); +} + TEST_F(AArch64GISelMITest, MatchIntConstantRegister) { setUp(); if (!TM)