Index: lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -198,14 +198,11 @@ BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB); } else { // Conditional branch. // Create a new transition block to hold the conditional branch. - BasicBlock *TransitionBB = BasicBlock::Create(F.getContext(), - "TransitionBlock", &F); + BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); - // Move BI from BB to the new transition block. - BI->removeFromParent(); - TransitionBB->getInstList().push_back(BI); - - // Create a branch that will always branch to the transition block. + // Create a branch that will always branch to the transition block and + // references DummyReturnBB. + BB->getTerminator()->eraseFromParent(); BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB); } } Index: test/CodeGen/AMDGPU/update-phi.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/update-phi.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s + +; Make sure that the phi in n28 is updated when the block is split by unify +; divergent exit nodes. + +define amdgpu_ps void @_amdgpu_ps_main() local_unnamed_addr #3 { +; IR-LABEL: @_amdgpu_ps_main( +; IR-NEXT: .entry: +; IR-NEXT: br label [[DOTLOOPEXIT:%.*]] +; IR: .loopexit: +; IR-NEXT: br label [[N28:%.*]] +; IR: n28: +; IR-NEXT: [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ] +; IR-NEXT: [[N29]] = fadd float [[DOT01]], 1.000000e+00 +; IR-NEXT: [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00 +; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[DUMMYRETURNBLOCK:%.*]] +; IR: TransitionBlock: +; IR-NEXT: br i1 [[N30]], label [[DOTLOOPEXIT]], label [[N28]] +; IR: n31: +; IR-NEXT: ret void +; IR: DummyReturnBlock: +; IR-NEXT: ret void +; +.entry: + br label %.loopexit + +.loopexit: ; preds = %n28, %.entry + br label %n28 + +n28: ; preds = %.loopexit, %n28 + %.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ] + %n29 = fadd float %.01, 1.0 + %n30 = fcmp ogt float %n29, 4.000000e+00 + br i1 %n30, label %.loopexit, label %n28 + +n31: ; preds = + ret void +}