diff --git a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp --- a/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp @@ -313,8 +313,15 @@ Value *Exec = popSaved(); Instruction *FirstInsertionPt = &*BB->getFirstInsertionPt(); - if (!isa(Exec) && !isa(FirstInsertionPt)) + if (!isa(Exec) && !isa(FirstInsertionPt)) { + Instruction *ExecDef = dyn_cast(Exec); + BasicBlock *DefBB = ExecDef->getParent(); + if (!DT->dominates(DefBB, BB)) { + // Split edge to make Def dominate Use + FirstInsertionPt = &*SplitEdge(DefBB, BB, DT, LI)->getFirstInsertionPt(); + } CallInst::Create(EndCf, Exec, "", FirstInsertionPt); + } } /// Annotate the control flow with intrinsics so the backend can @@ -327,7 +334,6 @@ const TargetMachine &TM = TPC.getTM(); initialize(*F.getParent(), TM.getSubtarget(F)); - for (df_iterator I = df_begin(&F.getEntryBlock()), E = df_end(&F.getEntryBlock()); I != E; ++I) { BasicBlock *BB = *I;