Index: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5983,11 +5983,18 @@ if (C1) { unsigned ImmOffset = C1->getZExtValue(); // If the immediate value is too big for the immoffset field, put the value - // mod 4096 into the immoffset field so that the value that is copied/added + // and -4096 into the immoffset field so that the value that is copied/added // for the voffset field is a multiple of 4096, and it stands more chance // of being CSEd with the copy/add for another similar load/store. + // However, do not do that rounding down to a multiple of 4096 if that is a + // negative number, as it appears to be illegal to have a negative offset + // in the vgpr, even if adding the immediate offset makes it positive. unsigned Overflow = ImmOffset & ~MaxImm; ImmOffset -= Overflow; + if ((int32_t)Overflow < 0) { + Overflow += ImmOffset; + ImmOffset = 0; + } C1 = cast(DAG.getConstant(ImmOffset, DL, MVT::i32)); if (Overflow) { auto OverflowVal = DAG.getConstant(Overflow, DL, MVT::i32); Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.raw.buffer.load.ll @@ -74,8 +74,8 @@ } ;CHECK-LABEL: {{^}}buffer_load_negative_offset: -;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, 0xfffff000, v0 -;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen offset:4080 +;CHECK: v_add_{{[iu]}}32_e32 [[VOFS:v[0-9]+]], vcc, -16, v0 +;CHECK: buffer_load_dwordx4 v[0:3], [[VOFS]], s[0:3], 0 offen define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) { main_body: %ofs.1 = add i32 %ofs, -16 Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.ll @@ -102,8 +102,8 @@ } ;CHECK-LABEL: {{^}}buffer_load_negative_offset: -;CHECK: v_add_{{[iu]}}32_e32 {{v[0-9]+}}, vcc, 0xfffff000, v0 -;CHECK: buffer_load_dwordx4 v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen offset:4080 +;CHECK: v_add_{{[iu]}}32_e32 {{v[0-9]+}}, vcc, -16, v0 +;CHECK: buffer_load_dwordx4 v[0:3], {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen define amdgpu_ps <4 x float> @buffer_load_negative_offset(<4 x i32> inreg, i32 %ofs) { main_body: %ofs.1 = add i32 %ofs, -16