diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -618,8 +618,8 @@ } } - RS.enterBasicBlock(MBB); - RS.forward(MI); + RS.enterBasicBlockEnd(MBB); + RS.backward(MI); // Ideally we want to have three registers for a long reg_sequence copy // to hide 2 waitstates between v_mov_b32 and accvgpr_write.