diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -1247,6 +1247,19 @@ FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); + // Change the condition of FC0 latch branch to true, as both successors of + // the branch are the same. + BranchInst *FC0LatchBranch = + dyn_cast(FC0.Latch->getTerminator()); + if (FC0LatchBranch) { + assert(FC0LatchBranch->isConditional() && + FC0LatchBranch->getSuccessor(0) == + FC0LatchBranch->getSuccessor(1) && + "Expecting the two successors of FC0LatchBranch to be the same"); + FC0LatchBranch->setCondition(llvm::ConstantInt::getTrue( + FC0LatchBranch->getCondition()->getType())); + } + // If FC0.Latch and FC0.ExitingBlock are the same then we have already // performed the updates above. if (FC0.Latch != FC0.ExitingBlock) @@ -1292,6 +1305,22 @@ // Delete the now empty loop L1. LI.erase(FC1.L); + // Move instructions from FC0.Latch to FC1.Latch bottom up. + for (auto It = ++FC0.Latch->rbegin(); It != FC0.Latch->rend();) { + Instruction *MovePos = FC1.Latch->getFirstNonPHIOrDbg(); + Instruction &I = *It; + ++It; + + if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI)) + I.moveBefore(MovePos); + else + break; + } + BasicBlock *Succ = FC0.Latch->getUniqueSuccessor(); + assert(Succ && "Expecting unique successor"); + MergeBlockIntoPredecessor(Succ, &DTU, &LI); + DTU.flush(); + #ifndef NDEBUG assert(!verifyFunction(*FC0.Header->getParent(), &errs())); assert(DT.verify(DominatorTree::VerificationLevel::Fast)); @@ -1491,6 +1520,19 @@ FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); + // Change the condition of FC0 latch branch to true, as both successors of + // the branch are the same. + BranchInst *FC0LatchBranch = + dyn_cast(FC0.Latch->getTerminator()); + if (FC0LatchBranch) { + assert(FC0LatchBranch->isConditional() && + FC0LatchBranch->getSuccessor(0) == + FC0LatchBranch->getSuccessor(1) && + "Expecting the two successors of FC0LatchBranch to be the same"); + FC0LatchBranch->setCondition(llvm::ConstantInt::getTrue( + FC0LatchBranch->getCondition()->getType())); + } + // If FC0.Latch and FC0.ExitingBlock are the same then we have already // performed the updates above. if (FC0.Latch != FC0.ExitingBlock) @@ -1545,6 +1587,22 @@ // Delete the now empty loop L1. LI.erase(FC1.L); + // Move instructions from FC0.Latch to FC1.Latch bottom up. + for (auto It = ++FC0.Latch->rbegin(); It != FC0.Latch->rend();) { + Instruction *MovePos = FC1.Latch->getFirstNonPHIOrDbg(); + Instruction &I = *It; + ++It; + + if (isSafeToMoveBefore(I, *MovePos, DT, PDT, DI)) + I.moveBefore(MovePos); + else + break; + } + BasicBlock *Succ = FC0.Latch->getUniqueSuccessor(); + assert(Succ && "Expecting unique successor"); + MergeBlockIntoPredecessor(Succ, &DTU, &LI); + DTU.flush(); + #ifndef NDEBUG assert(!verifyFunction(*FC0.Header->getParent(), &errs())); assert(DT.verify(DominatorTree::VerificationLevel::Fast)); diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp --- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp +++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp @@ -117,9 +117,9 @@ if (MoveForward) { // When I is being moved forward, we need to make sure the InsertPoint // dominates every users. Or else, a user may be using an undefined I. - for (const Value *User : I.users()) - if (auto *UserInst = dyn_cast(User)) - if (!DT.dominates(&InsertPoint, UserInst)) + for (const Use &U : I.uses()) + if (auto *UserInst = dyn_cast(U.getUser())) + if (UserInst != &InsertPoint && !DT.dominates(&InsertPoint, U)) return false; } else { // When I is being moved backward, we need to make sure all its opernads diff --git a/llvm/test/Transforms/LoopFusion/four_loops.ll b/llvm/test/Transforms/LoopFusion/four_loops.ll --- a/llvm/test/Transforms/LoopFusion/four_loops.ll +++ b/llvm/test/Transforms/LoopFusion/four_loops.ll @@ -9,20 +9,14 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]+]], label %[[LOOP2BODY]] +; CHECK: br label %[[LOOP2BODY:bb[0-9]+]] ; CHECK: [[LOOP2BODY]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP3BODY]] +; CHECK: br label %[[LOOP3BODY:bb[0-9]+]] ; CHECK: [[LOOP3BODY]] -; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]] -; CHECK: [[LOOP3LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP4BODY]] +; CHECK: br label %[[LOOP4BODY:bb[0-9]+]] ; CHECK: [[LOOP4BODY]] -; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]] -; CHECK: [[LOOP4LATCH]] +; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]] +; CHECK: [[LOOP1LATCH]] ; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOPEXIT:bb[0-9]+]] ; CHECK: ret void define void @dep_free() { diff --git a/llvm/test/Transforms/LoopFusion/guarded.ll b/llvm/test/Transforms/LoopFusion/guarded.ll --- a/llvm/test/Transforms/LoopFusion/guarded.ll +++ b/llvm/test/Transforms/LoopFusion/guarded.ll @@ -8,8 +8,6 @@ ; CHECK: [[LOOP1PREHEADER]] ; CHECK-NEXT: br label %[[LOOP1BODY:bb[0-9]*]] ; CHECK: [[LOOP1BODY]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2BODY]] -; CHECK: [[LOOP2BODY]] ; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY]], label %[[LOOP2EXIT:bb[0-9]+]] ; CHECK: [[LOOP2EXIT]] ; CHECK: br label %[[LOOP1SUCC]] diff --git a/llvm/test/Transforms/LoopFusion/loop_nest.ll b/llvm/test/Transforms/LoopFusion/loop_nest.ll --- a/llvm/test/Transforms/LoopFusion/loop_nest.ll +++ b/llvm/test/Transforms/LoopFusion/loop_nest.ll @@ -25,19 +25,16 @@ ; CHECK: [[LOOP1HEADER]] ; CHECK: br label %[[LOOP3HEADER:bb[0-9]+]] ; CHECK: [[LOOP3HEADER]] -; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]] -; CHECK: [[LOOP3LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2PREHEADER:bb[0-9]+]], label %[[LOOP2PREHEADER]] -; CHECK: [[LOOP2PREHEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]+]] +; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP4HEADER:bb[0-9]+]] ; CHECK: [[LOOP4HEADER]] -; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]] -; CHECK: [[LOOP4LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP4HEADER]], label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]] +; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]] +; CHECK: [[LOOP1LATCH]] +; CHECK-NEXT: %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1 +; CHECK-NEXT: %add.outer.fc0 = add nuw nsw i32 %.06, 1 +; CHECK-NEXT: %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100 +; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]] ; CHECK: ret void ; TODO: The current version of loop fusion does not allow the inner loops to be @@ -48,8 +45,8 @@ br label %bb16 bb16: ; preds = %bb, %bb27 - %.06 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ] - %indvars.iv105 = phi i64 [ 0, %bb ], [ %indvars.iv.next11, %bb27 ] + %.06 = phi i32 [ 0, %bb ], [ %add.outer.fc0, %bb27 ] + %indvars.iv105 = phi i64 [ 0, %bb ], [ %inc.outer.fc0, %bb27 ] br label %bb18 bb30: ; preds = %bb27 @@ -73,10 +70,10 @@ br i1 %exitcond9, label %bb18, label %bb27 bb27: ; preds = %bb25 - %indvars.iv.next11 = add nuw nsw i64 %indvars.iv105, 1 - %tmp28 = add nuw nsw i32 %.06, 1 - %exitcond12 = icmp ne i64 %indvars.iv.next11, 100 - br i1 %exitcond12, label %bb16, label %bb30 + %inc.outer.fc0 = add nuw nsw i64 %indvars.iv105, 1 + %add.outer.fc0 = add nuw nsw i32 %.06, 1 + %cmp.outer.fc0 = icmp ne i64 %inc.outer.fc0, 100 + br i1 %cmp.outer.fc0, label %bb16, label %bb30 bb33: ; preds = %bb30, %bb45 %.023 = phi i32 [ 0, %bb30 ], [ %tmp46, %bb45 ] diff --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll --- a/llvm/test/Transforms/LoopFusion/simple.ll +++ b/llvm/test/Transforms/LoopFusion/simple.ll @@ -6,9 +6,7 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]] ; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] @@ -72,9 +70,7 @@ ; CHECK: [[LOOP1PREHEADER]] ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]] ; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] @@ -129,9 +125,7 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]] ; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] @@ -179,8 +173,6 @@ ; CHECK: [[LOOP1PREHEADER]] ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]] -; CHECK: [[LOOP2HEADER]] ; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[EXITBLOCK]] ; CHECK: ret void define void @raw_only_parametric(i32* noalias %arg, i32 %arg4) { @@ -217,9 +209,7 @@ ; CHECK-NEXT: bb: ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]] +; CHECK: br label %[[LOOP2HEADER:bb[0-9]*]] ; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]]