Index: llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h +++ llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.h @@ -20,7 +20,7 @@ namespace AMDGPU { /// Returns base register and constant offset. -std::pair +std::pair getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg); bool isLegalVOP3PShuffleMask(ArrayRef Mask); Index: llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUGlobalISelUtils.cpp @@ -13,14 +13,14 @@ using namespace llvm; using namespace MIPatternMatch; -std::pair +std::pair AMDGPU::getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg) { MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); if (!Def) return std::make_pair(Reg, 0); if (Def->getOpcode() == TargetOpcode::G_CONSTANT) { - unsigned Offset; + int64_t Offset; const MachineOperand &Op = Def->getOperand(1); if (Op.isImm()) Offset = Op.getImm(); Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1346,7 +1346,7 @@ } Register Base; - unsigned Offset; + int64_t Offset; std::tie(Base, Offset) = AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset); @@ -1373,7 +1373,8 @@ } // Handle the variable sgpr + vgpr case. - if (MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI)) { + MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI); + if (Add && Offset >= 0) { Register Src0 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(1).getReg()); Register Src1 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(2).getReg()); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir @@ -46,3 +46,42 @@ S_ENDPGM 0, implicit %4 ... + +--- +name: s_buffer_load_negative_offset +legalized: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 + + ; FAST-LABEL: name: s_buffer_load_negative_offset + ; FAST: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 + ; FAST: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; FAST: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 + ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + ; GREEDY-LABEL: name: s_buffer_load_negative_offset + ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 + ; GREEDY: [[COPY:%[0-9]+]]:sgpr(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + ; GREEDY: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 + ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) + ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) + %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_CONSTANT i32 -60 + %3:_(s32) = G_ADD %1, %2 + %4:_(s32) = G_AMDGPU_S_BUFFER_LOAD %0, %3, 0 + S_ENDPGM 0, implicit %4 + +...