Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -1321,6 +1321,8 @@ return Def->getOperand(0).getReg(); } +static bool signBitNotSet(unsigned Val) { return (Val & 0x80000000) == 0; } + // Analyze a combined offset from an llvm.amdgcn.s.buffer intrinsic and store // the three offsets (voffset, soffset and instoffset) static unsigned setBufferOffsets(MachineIRBuilder &B, @@ -1352,8 +1354,9 @@ AMDGPU::getBaseWithConstantOffset(*MRI, CombinedOffset); uint32_t SOffset, ImmOffset; - if (Offset > 0 && AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset, - &RBI.Subtarget, Alignment)) { + if (Offset != 0 && signBitNotSet(Offset) && + AMDGPU::splitMUBUFOffset(Offset, SOffset, ImmOffset, &RBI.Subtarget, + Alignment)) { if (RBI.getRegBank(Base, *MRI, *RBI.TRI) == &AMDGPU::VGPRRegBank) { VOffsetReg = Base; SOffsetReg = B.buildConstant(S32, SOffset).getReg(0); @@ -1373,7 +1376,8 @@ } // Handle the variable sgpr + vgpr case. - if (MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI)) { + MachineInstr *Add = getOpcodeDef(AMDGPU::G_ADD, CombinedOffset, *MRI); + if (Add && signBitNotSet(Offset)) { Register Src0 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(1).getReg()); Register Src1 = getSrcRegIgnoringCopies(*MRI, Add->getOperand(2).getReg()); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-amdgcn.s.buffer.load.mir @@ -62,8 +62,9 @@ ; FAST: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 ; FAST: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; FAST: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; FAST: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; FAST: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; FAST: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) ; FAST: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) ; GREEDY-LABEL: name: s_buffer_load_negative_offset ; GREEDY: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $vgpr0 @@ -72,8 +73,9 @@ ; GREEDY: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 -60 ; GREEDY: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32) ; GREEDY: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY1]], [[COPY2]] - ; GREEDY: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 - ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C1]](s32), [[COPY1]], [[C]], 0, 0, 0 :: (dereferenceable invariant load 4) + ; GREEDY: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[C2:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; GREEDY: [[AMDGPU_BUFFER_LOAD:%[0-9]+]]:vgpr(s32) = G_AMDGPU_BUFFER_LOAD [[COPY]](<4 x s32>), [[C2]](s32), [[ADD]], [[C1]], 0, 0, 0 :: (dereferenceable invariant load 4) ; GREEDY: S_ENDPGM 0, implicit [[AMDGPU_BUFFER_LOAD]](s32) %0:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3 %1:_(s32) = COPY $vgpr0