Index: lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp =================================================================== --- lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -142,9 +142,10 @@ DEBUG(dbgs() << "Removing no effect instruction: " << *I << '\n'); - for (auto &Op : I->operands()) + for (auto &Op : I->operands()) { if (Op.isReg()) RecalcRegs.insert(Op.getReg()); + } auto Next = std::next(I); LIS->RemoveMachineInstrFromMaps(*I); @@ -193,6 +194,11 @@ auto SaveExec = getOrExecSource(*Lead, *TII, MRI); unsigned SaveExecReg = getOrNonExecReg(*Lead, *TII); + for (auto &Op : Lead->operands()) { + if (Op.isReg()) + RecalcRegs.insert(Op.getReg()); + } + LIS->RemoveMachineInstrFromMaps(*Lead); Lead->eraseFromParent(); if (SaveExecReg) { Index: test/CodeGen/AMDGPU/collapse-endcf.ll =================================================================== --- test/CodeGen/AMDGPU/collapse-endcf.ll +++ test/CodeGen/AMDGPU/collapse-endcf.ll @@ -202,8 +202,68 @@ ret void } +; Make sure scc liveness is updated if sor_b64 is removed +; GCN-LABEL: {{^}}scc_liveness: + +; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]: +; GCN: s_andn2_b64 exec, exec, +; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]] + +; GCN: buffer_load_dword v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen +; GCN: s_and_b64 exec, exec, vcc + +; GCN-NOT: s_or_b64 exec, exec + +; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} +; GCN: s_andn2_b64 +; GCN-NEXT: s_cbranch_execnz + +; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}} +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: buffer_store_dword +; GCN: s_setpc_b64 +define void @scc_liveness(i32 %arg) local_unnamed_addr #0 { +bb: + br label %bb1 + +bb1: ; preds = %Flow1, %bb1, %bb + %tmp = icmp slt i32 %arg, 519 + br i1 %tmp, label %bb2, label %bb1 + +bb2: ; preds = %bb1 + %tmp3 = icmp eq i32 %arg, 0 + br i1 %tmp3, label %bb4, label %bb10 + +bb4: ; preds = %bb2 + %tmp6 = load float, float* undef + %tmp7 = fcmp olt float %tmp6, 0.0 + br i1 %tmp7, label %bb8, label %Flow + +bb8: ; preds = %bb4 + %tmp9 = insertelement <4 x float> undef, float 0.0, i32 1 + br label %Flow + +Flow: ; preds = %bb8, %bb4 + %tmp8 = phi <4 x float> [ %tmp9, %bb8 ], [ zeroinitializer, %bb4 ] + br label %bb10 + +bb10: ; preds = %Flow, %bb2 + %tmp11 = phi <4 x float> [ zeroinitializer, %bb2 ], [ %tmp8, %Flow ] + br i1 %tmp3, label %bb12, label %Flow1 + +Flow1: ; preds = %bb10 + br label %bb1 + +bb12: ; preds = %bb10 + store volatile <4 x float> %tmp11, <4 x float>* undef, align 16 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #0 declare void @llvm.amdgcn.s.barrier() #1 attributes #0 = { nounwind readnone speculatable } attributes #1 = { nounwind convergent } +attributes #2 = { nounwind }