Index: llvm/lib/CodeGen/GlobalISel/Localizer.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -124,7 +124,7 @@ LocalizedInstrs.insert(LocalizedMI); MachineInstr &UseMI = *MOUse.getParent(); if (MRI->hasOneUse(Reg) && !UseMI.isPHI()) - InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI); + InsertMBB->insert(UseMI, LocalizedMI); else InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()), LocalizedMI); @@ -173,9 +173,10 @@ while (II != MBB.end() && !Users.count(&*II)) ++II; + assert(II != MBB.end() && "Didn't find the user in the MBB"); LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II << "\n"); - assert(II != MBB.end() && "Didn't find the user in the MBB"); + MI->removeFromParent(); MBB.insert(II, MI); Changed = true; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/localizer-wrong-insert-point.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs -run-pass=localizer -o - %s | FileCheck %s + +# Previously this was placing the new G_CONSTANT after the use call +--- +name: wrong_user_insert_pt +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: wrong_user_insert_pt + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 + ; CHECK-NEXT: $sgpr30_sgpr31 = G_SI_CALL [[C1]](p0), 0, csr_amdgpu_highregs + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $scc + ; CHECK-NEXT: S_SETPC_B64_return undef $sgpr30_sgpr31 + bb.0: + %0:_(p0) = G_CONSTANT i64 0 + G_BR %bb.1 + + bb.1: + ADJCALLSTACKUP 0, 0, implicit-def $scc + $sgpr30_sgpr31 = G_SI_CALL %0, 0, csr_amdgpu_highregs + ADJCALLSTACKDOWN 0, 0, implicit-def $scc + S_SETPC_B64_return undef $sgpr30_sgpr31 + +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll @@ -224,3 +224,39 @@ bb2: ret void } + +; This would crash from using the wrong insert point +define void @sink_null_insert_pt(i32 addrspace(4)* %arg0) { +; GFX9-LABEL: sink_null_insert_pt: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_or_saveexec_b64 s[16:17], -1 +; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill +; GFX9-NEXT: s_mov_b64 exec, s[16:17] +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-NEXT: global_load_dword v0, v[0:1], off glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_writelane_b32 v40, s33, 2 +; GFX9-NEXT: v_writelane_b32 v40, s30, 0 +; GFX9-NEXT: s_mov_b32 s33, s32 +; GFX9-NEXT: s_addk_i32 s32, 0x400 +; GFX9-NEXT: v_writelane_b32 v40, s31, 1 +; GFX9-NEXT: s_swappc_b64 s[30:31], 0 +; GFX9-NEXT: v_readlane_b32 s4, v40, 0 +; GFX9-NEXT: v_readlane_b32 s5, v40, 1 +; GFX9-NEXT: s_addk_i32 s32, 0xfc00 +; GFX9-NEXT: v_readlane_b32 s33, v40, 2 +; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 +; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload +; GFX9-NEXT: s_mov_b64 exec, s[6:7] +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[4:5] +entry: + %load0 = load volatile i32, i32 addrspace(1)* null, align 4 + br label %bb1 + +bb1: + call void null() + ret void +}