diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -735,15 +735,14 @@ } // Ensure that FC0 and FC1 have identical guards. - // If one (or both) are not guarded, this check is not necessary. - if (FC0->GuardBranch && FC1->GuardBranch && - !haveIdenticalGuards(*FC0, *FC1)) { - LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical " - "guards. Not Fusing.\n"); - reportLoopFusion(*FC0, *FC1, - NonIdenticalGuards); - continue; - } + if (FC0->GuardBranch || FC1->GuardBranch) + if (!haveIdenticalGuards(*FC0, *FC1)) { + LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical " + "guards. Not Fusing.\n"); + reportLoopFusion(*FC0, *FC1, + NonIdenticalGuards); + continue; + } // The following three checks look for empty blocks in FC0 and FC1. If // any of these blocks are non-empty, we do not fuse. This is done @@ -1061,8 +1060,16 @@ /// NonLoopBlock). In other words, the the first successor of both loops must /// both go into the loop (i.e., the preheader) or go around the loop (i.e., /// the NonLoopBlock). The same must be true for the second successor. + /// If one of the loops has a guard, but the other loop does not, they do not + /// have identical guards. bool haveIdenticalGuards(const FusionCandidate &FC0, const FusionCandidate &FC1) const { + // Either both loops should have a guard or neither loop should have a + // guard. + if (!((FC0.GuardBranch && FC1.GuardBranch) || + (!FC0.GuardBranch && !FC1.GuardBranch))) + return false; + assert(FC0.GuardBranch && FC1.GuardBranch && "Expecting FC0 and FC1 to be guarded loops."); diff --git a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll --- a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll +++ b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll @@ -414,3 +414,35 @@ bb29: ; preds = %bb18 ret void } + +; Check that a guarded loop and a non-guarded loop are not fused. +; CHECK: Performing Loop Fusion on function _Z3fooPiS_b +; CHECK: Fusion candidates do not have identical guards. Not Fusing. +define dso_local void @_Z3fooPiS_b(i32* noalias %A, i32* noalias %B, i1 zeroext %cond) #0 { +entry: + %frombool = zext i1 %cond to i8 + %tobool = trunc i8 %frombool to i1 + br i1 %tobool, label %for.body.preheader, label %if.end +for.body.preheader: ; preds = %entry + br label %for.body +for.body: ; preds = %for.body.preheader, %for.body + %i.02 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.02 + store i32 0, i32* %arrayidx, align 4 + %inc = add nsw i64 %i.02, 1 + %cmp = icmp slt i64 %inc, 100 + br i1 %cmp, label %for.body, label %if.end.loopexit +if.end.loopexit: ; preds = %for.body + br label %if.end +if.end: ; preds = %if.end.loopexit, %entry + br label %for.body4 +for.body4: ; preds = %if.end, %for.body4 + %i1.01 = phi i64 [ 0, %if.end ], [ %inc7, %for.body4 ] + %arrayidx5 = getelementptr inbounds i32, i32* %B, i64 %i1.01 + store i32 0, i32* %arrayidx5, align 4 + %inc7 = add nsw i64 %i1.01, 1 + %cmp3 = icmp slt i64 %inc7, 100 + br i1 %cmp3, label %for.body4, label %for.end8 +for.end8: ; preds = %for.body4 + ret void +}