diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp --- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp @@ -392,7 +392,12 @@ const MachineMemOperand *B) { unsigned MinOffset = std::min(A->getOffset(), B->getOffset()); unsigned Size = A->getSize() + B->getSize(); - return MF.getMachineMemOperand(A, MinOffset, Size); + // This function adds the offset parameter to the existing offset for A, + // so we pass 0 here as the offset and then manually set it to the correct + // value after the call. + MachineMemOperand *MMO = MF.getMachineMemOperand(A, 0, Size); + MMO->setOffset(MinOffset); + return MMO; } bool SILoadStoreOptimizer::offsetsCanBeCombined(CombineInfo &CI, bool Modify) { diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir --- a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir +++ b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir @@ -65,7 +65,8 @@ attributes #0 = { convergent nounwind } - define amdgpu_kernel void @merge_mmos() { ret void } + define amdgpu_kernel void @merge_mmos(i32 addrspace(1)* %ptr_addr1) { ret void } + ... --- name: mem_dependency @@ -170,6 +171,8 @@ # CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: (dereferenceable invariant load 8, align 4) # CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 4) # CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 4) +# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from %ir.ptr_addr1 + 64, align 4 +# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.ptr_addr1 + 64, align 4 name: merge_mmos tracksRegLiveness: true body: | @@ -183,6 +186,11 @@ %4:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 4, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4) BUFFER_STORE_DWORD_OFFSET_exact %3, %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4) BUFFER_STORE_DWORD_OFFSET_exact %4, %0, 0, 4, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4) + %5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 64) + %6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 68, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 68) + BUFFER_STORE_DWORD_OFFSET_exact %5, %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 64) + BUFFER_STORE_DWORD_OFFSET_exact %6, %0, 0, 68, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 68) + S_ENDPGM 0 ...