Index: llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -509,8 +509,25 @@ BuildMI(MBB, InsPt, DL, TII->get(Opcode), Exec) .addReg(Exec) .add(MI.getOperand(0)); - if (LV) - LV->replaceKillInstruction(MI.getOperand(0).getReg(), MI, *NewMI); + if (LV) { + LV->replaceKillInstruction(DataReg, MI, *NewMI); + + if (SplitBB != &MBB) { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + Register Reg = Register::index2VirtReg(i); + LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); + + if (VI.AliveBlocks.test(MBB.getNumber())) + VI.AliveBlocks.set(SplitBB->getNumber()); + else if (LV->isPHIJoin(Reg)) { + // Pass through live phis + for (MachineInstr *Kill : VI.Kills) + if (Kill->getParent() == SplitBB) + VI.AliveBlocks.set(MBB.getNumber()); + } + } + } + } LoweredEndCf.insert(NewMI); Index: llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/lower-control-flow-live-variables-update.mir @@ -0,0 +1,87 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-amd-amdhsa -start-before=livevars -stop-after=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck %s + +# FIXME: update_mir_test_checks tries to incorrectly re-use a variable +# name used for a copy, so some of the check variable names were +# manually fixed. + +# Check for LiveVariables verifier error after lowering SI_END_CF + +--- +name: phi_visit_order +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: phi_visit_order + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 + ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed [[COPY]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed [[V_MOV_B32_e32_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY3]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY3]], implicit-def dead $scc + ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; CHECK-NEXT: [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_]], implicit $exec + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY killed [[S_MOV_B64_term]] + ; CHECK-NEXT: $exec = S_OR_B64_term $exec, killed [[COPY4]], implicit-def $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed [[COPY1]] + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = nsw V_ADD_U32_e32 1, killed [[COPY5]], implicit $exec + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY66:%[0-9]+]]:vgpr_32 = COPY killed [[COPY6]] + ; CHECK-NEXT: GLOBAL_STORE_DWORD undef %10:vreg_64, [[COPY66]], 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[COPY66]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY7]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed [[COPY7]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY8]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc + ; CHECK-NEXT: [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY8]], implicit-def dead $scc + ; CHECK-NEXT: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; CHECK-NEXT: [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term killed [[S_XOR_B64_1]], implicit $exec + ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.2 + bb.0: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + liveins: $vgpr0 + + %0:vgpr_32 = COPY killed $vgpr0 + %1:vgpr_32 = V_MOV_B32_e32 0, implicit $exec + %2:sreg_64_xexec = V_CMP_EQ_U32_e64 0, killed %0, implicit $exec + %3:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2(0x80000000) + + %4:sreg_64_xexec = PHI %5, %bb.2, %3, %bb.0 + %6:vgpr_32 = PHI %7, %bb.2, %1, %bb.0 + SI_END_CF killed %4, implicit-def $exec, implicit-def dead $scc, implicit $exec + %8:vgpr_32 = nsw V_ADD_U32_e32 1, killed %6, implicit $exec + + bb.2: + successors: %bb.2(0x40000000), %bb.1(0x40000000) + + %9:vgpr_32 = PHI %8, %bb.1, %7, %bb.2, %1, %bb.0 + GLOBAL_STORE_DWORD undef %10:vreg_64, %9, 0, 0, implicit $exec :: (volatile store (s32), addrspace 1) + %7:vgpr_32 = COPY killed %9 + %5:sreg_64_xexec = SI_IF %2, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.2 + +... Index: llvm/test/CodeGen/AMDGPU/phi-visit-order.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/phi-visit-order.ll @@ -0,0 +1,23 @@ +; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs < %s + +define amdgpu_kernel void @phi_visit_order() { +bb: + br label %bb1 + +bb1: + %tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb4 ] + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %cnd = icmp eq i32 %tid, 0 + br i1 %cnd, label %bb4, label %bb2 + +bb2: + %tmp3 = add nsw i32 %tmp, 1 + br label %bb4 + +bb4: + %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ] + store volatile i32 %tmp5, i32 addrspace(1)* undef + br label %bb1 +} + +declare i32 @llvm.amdgcn.workitem.id.x() #0