diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -1841,7 +1841,7 @@ for (auto MI = MBB.begin(); MI != MBB.end(); ++MI) { // Unbundle instructions after the post-RA scheduler. - if (MI->isBundle()) { + if (MI->isBundle() && MI->mayLoadOrStore()) { MachineBasicBlock::instr_iterator II(MI->getIterator()); for (MachineBasicBlock::instr_iterator I = ++II, E = MBB.instr_end(); I != E && I->isBundledWithPred(); ++I) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.i128.ll @@ -26,8 +26,8 @@ ; GFX9-NEXT: global_load_dwordx4 v[14:17], v[0:1], off offset:48 ; GFX9-NEXT: s_lshl_b32 s0, s2, 1 ; GFX9-NEXT: s_lshl_b32 s2, s0, 1 -; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_set_gpr_idx_on s2, gpr_idx(SRC0) ; GFX9-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, v3 ; GFX9-NEXT: v_mov_b32_e32 v18, v2