diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -895,9 +895,9 @@ continue; } - if (!FC0->GuardBranch && FC1->GuardBranch) { - LLVM_DEBUG(dbgs() << "The second candidate is guarded while the " - "first one is not. Not fusing.\n"); + if (!FC0->GuardBranch != !FC1->GuardBranch) { + LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the " + "another one is not. Not fusing.\n"); reportLoopFusion( *FC0, *FC1, OnlySecondCandidateIsGuarded); continue; diff --git a/llvm/test/Transforms/LoopFusion/guarded.ll b/llvm/test/Transforms/LoopFusion/guarded.ll --- a/llvm/test/Transforms/LoopFusion/guarded.ll +++ b/llvm/test/Transforms/LoopFusion/guarded.ll @@ -1,19 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -S -loop-fusion < %s | FileCheck %s @B = common global [1024 x i32] zeroinitializer, align 16 -; CHECK: void @dep_free_parametric -; CHECK-next: entry: -; CHECK: br i1 %{{.*}}, label %[[LOOP1PREHEADER:bb[0-9]*]], label %[[LOOP1SUCC:bb[0-9]+]] -; CHECK: [[LOOP1PREHEADER]] -; CHECK-NEXT: br label %[[LOOP1BODY:bb[0-9]*]] -; CHECK: [[LOOP1BODY]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY]], label %[[LOOP2EXIT:bb[0-9]+]] -; CHECK: [[LOOP2EXIT]] -; CHECK: br label %[[LOOP1SUCC]] -; CHECK: [[LOOP1SUCC]] -; CHECK: ret void define void @dep_free_parametric(i32* noalias %A, i64 %N) { +; CHECK-LABEL: @dep_free_parametric( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i64 0, [[N:%.*]] +; CHECK-NEXT: [[CMP31:%.*]] = icmp slt i64 0, [[N]] +; CHECK-NEXT: br i1 [[CMP4]], label [[BB3:%.*]], label [[BB12:%.*]] +; CHECK: bb3: +; CHECK-NEXT: br label [[BB5:%.*]] +; CHECK: bb5: +; CHECK-NEXT: [[I_05:%.*]] = phi i64 [ [[INC:%.*]], [[BB5]] ], [ 0, [[BB3]] ] +; CHECK-NEXT: [[I1_02:%.*]] = phi i64 [ [[INC14:%.*]], [[BB5]] ], [ 0, [[BB3]] ] +; CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[I_05]], 3 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[I_05]], 3 +; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[SUB]], [[ADD]] +; CHECK-NEXT: [[REM:%.*]] = srem i64 [[MUL]], [[I_05]] +; CHECK-NEXT: [[CONV:%.*]] = trunc i64 [[REM]] to i32 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I_05]] +; CHECK-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i64 [[I_05]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]] +; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i64 [[I1_02]], 3 +; CHECK-NEXT: [[ADD8:%.*]] = add nsw i64 [[I1_02]], 3 +; CHECK-NEXT: [[MUL9:%.*]] = mul nsw i64 [[SUB7]], [[ADD8]] +; CHECK-NEXT: [[REM10:%.*]] = srem i64 [[MUL9]], [[I1_02]] +; CHECK-NEXT: [[CONV11:%.*]] = trunc i64 [[REM10]] to i32 +; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 [[I1_02]] +; CHECK-NEXT: store i32 [[CONV11]], i32* [[ARRAYIDX12]], align 4 +; CHECK-NEXT: [[INC14]] = add nsw i64 [[I1_02]], 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp slt i64 [[INC14]], [[N]] +; CHECK-NEXT: br i1 [[CMP3]], label [[BB5]], label [[BB15:%.*]] +; CHECK: bb15: +; CHECK-NEXT: br label [[BB12]] +; CHECK: bb12: +; CHECK-NEXT: ret void +; entry: %cmp4 = icmp slt i64 0, %N br i1 %cmp4, label %bb3, label %bb14 @@ -67,19 +91,31 @@ ; Test that `%add` is moved in for.first.preheader, and the two loops for.first ; and for.second are fused. -; CHECK: void @moveinsts_preheader -; CHECK-LABEL: for.first.guard: -; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end -; CHECK-LABEL: for.first.preheader: -; CHECK-NEXT: %add = add nsw i32 %x, 1 -; CHECK-NEXT: br label %for.first -; CHECK-LABEL: for.first: -; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit -; CHECK-LABEL: for.second.exit: -; CHECK-NEXT: br label %for.end -; CHECK-LABEL: for.end: -; CHECK-NEXT: ret void define void @moveinsts_preheader(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { +; CHECK-LABEL: @moveinsts_preheader( +; CHECK-NEXT: for.first.guard: +; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.first.preheader: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1 +; CHECK-NEXT: br label [[FOR_FIRST:%.*]] +; CHECK: for.first: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[INC_I:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] +; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[INC_J:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] +; CHECK-NEXT: [[AI:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I]] +; CHECK-NEXT: store i32 0, i32* [[AI]], align 4 +; CHECK-NEXT: [[INC_I]] = add nsw i64 [[I]], 1 +; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i64 [[INC_I]], [[N]] +; CHECK-NEXT: [[BJ:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[J]] +; CHECK-NEXT: store i32 0, i32* [[BJ]], align 4 +; CHECK-NEXT: [[INC_J]] = add nsw i64 [[J]], 1 +; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC_J]], [[N]] +; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]] +; CHECK: for.second.exit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; for.first.guard: %cmp.guard = icmp slt i64 0, %N br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard @@ -123,19 +159,31 @@ ; Test that `%add` is moved in for.second.exit, and the two loops for.first ; and for.second are fused. -; CHECK: void @moveinsts_exitblock -; CHECK-LABEL: for.first.guard: -; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end -; CHECK-LABEL: for.first.preheader: -; CHECK-NEXT: br label %for.first -; CHECK-LABEL: for.first: -; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit -; CHECK-LABEL: for.second.exit: -; CHECK-NEXT: %add = add nsw i32 %x, 1 -; CHECK-NEXT: br label %for.end -; CHECK-LABEL: for.end: -; CHECK-NEXT: ret void define void @moveinsts_exitblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { +; CHECK-LABEL: @moveinsts_exitblock( +; CHECK-NEXT: for.first.guard: +; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.first.preheader: +; CHECK-NEXT: br label [[FOR_FIRST:%.*]] +; CHECK: for.first: +; CHECK-NEXT: [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] +; CHECK-NEXT: [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I_04]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i64 [[I_04]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[J_02]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[INC6]] = add nsw i64 [[J_02]], 1 +; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]] +; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]] +; CHECK: for.second.exit: +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1 +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; for.first.guard: %cmp.guard = icmp slt i64 0, %N br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard @@ -179,20 +227,31 @@ ; Test that `%add` is moved in for.first.guard, and the two loops for.first ; and for.second are fused. -; CHECK: void @moveinsts_guardblock -; CHECK-LABEL: for.first.guard: -; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N -; CHECK-NEXT: %add = add nsw i32 %x, 1 -; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end -; CHECK-LABEL: for.first.preheader: -; CHECK-NEXT: br label %for.first -; CHECK-LABEL: for.first: -; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit -; CHECK-LABEL: for.second.exit: -; CHECK-NEXT: br label %for.end -; CHECK-LABEL: for.end: -; CHECK-NEXT: ret void define void @moveinsts_guardblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { +; CHECK-LABEL: @moveinsts_guardblock( +; CHECK-NEXT: for.first.guard: +; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]] +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[X:%.*]], 1 +; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.first.preheader: +; CHECK-NEXT: br label [[FOR_FIRST:%.*]] +; CHECK: for.first: +; CHECK-NEXT: [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] +; CHECK-NEXT: [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I_04]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i64 [[I_04]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[J_02]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[INC6]] = add nsw i64 [[J_02]], 1 +; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]] +; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]] +; CHECK: for.second.exit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; for.first.guard: %cmp.guard = icmp slt i64 0, %N br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard @@ -237,21 +296,31 @@ ; from for.second.guard to for.first.guard, and the two loops for.first and ; for.second are fused. -; CHECK: i64 @updatephi_guardnonloopblock -; CHECK-LABEL: for.first.guard: -; CHECK-NEXT: %cmp.guard = icmp slt i64 0, %N -; CHECK: br i1 %cmp.guard, label %for.first.preheader, label %for.end -; CHECK-LABEL: for.first.preheader: -; CHECK-NEXT: br label %for.first -; CHECK-LABEL: for.first: -; CHECK: br i1 %cmp.j, label %for.first, label %for.second.exit -; CHECK-LABEL: for.second.exit: -; CHECK-NEXT: br label %for.end -; CHECK-LABEL: for.end: -; CHECK-NEXT: %j.lcssa = phi i64 [ 0, %for.first.guard ], [ %j.02, %for.second.exit ] -; CHECK-NEXT: ret i64 %j.lcssa - define i64 @updatephi_guardnonloopblock(i32* noalias %A, i32* noalias %B, i64 %N, i32 %x) { +; CHECK-LABEL: @updatephi_guardnonloopblock( +; CHECK-NEXT: for.first.guard: +; CHECK-NEXT: [[CMP_GUARD:%.*]] = icmp slt i64 0, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP_GUARD]], label [[FOR_FIRST_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.first.preheader: +; CHECK-NEXT: br label [[FOR_FIRST:%.*]] +; CHECK: for.first: +; CHECK-NEXT: [[I_04:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] +; CHECK-NEXT: [[J_02:%.*]] = phi i64 [ [[INC6:%.*]], [[FOR_FIRST]] ], [ 0, [[FOR_FIRST_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[I_04]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[INC]] = add nsw i64 [[I_04]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INC]], [[N]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[J_02]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[INC6]] = add nsw i64 [[J_02]], 1 +; CHECK-NEXT: [[CMP_J:%.*]] = icmp slt i64 [[INC6]], [[N]] +; CHECK-NEXT: br i1 [[CMP_J]], label [[FOR_FIRST]], label [[FOR_SECOND_EXIT:%.*]] +; CHECK: for.second.exit: +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[J_LCSSA:%.*]] = phi i64 [ 0, [[FOR_FIRST_GUARD:%.*]] ], [ [[J_02]], [[FOR_SECOND_EXIT]] ] +; CHECK-NEXT: ret i64 [[J_LCSSA]] +; for.first.guard: %cmp.guard = icmp slt i64 0, %N br i1 %cmp.guard, label %for.first.preheader, label %for.second.guard @@ -291,3 +360,33 @@ %j.lcssa = phi i64 [ 0, %for.second.guard ], [ %j.02, %for.second.exit ] ret i64 %j.lcssa } + +define void @pr59024() { +; CHECK-LABEL: @pr59024( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[FOR_2_PREHEADER:%.*]], label [[FOR_1_PREHEADER:%.*]] +; CHECK: for.1.preheader: +; CHECK-NEXT: br label [[FOR_1:%.*]] +; CHECK: for.1: +; CHECK-NEXT: br i1 true, label [[FOR_2_PREHEADER_LOOPEXIT:%.*]], label [[FOR_1]] +; CHECK: for.2.preheader.loopexit: +; CHECK-NEXT: br label [[FOR_2_PREHEADER]] +; CHECK: for.2.preheader: +; CHECK-NEXT: br label [[FOR_2:%.*]] +; CHECK: for.2: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[FOR_2]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br i1 false, label %for.2, label %for.1 + +for.1: ; preds = %for.body6, %entry + br i1 true, label %for.2, label %for.1 + +for.2: ; preds = %for.cond13, %for.body6, %entry + br i1 true, label %exit, label %for.2 + +exit: ; preds = %for.cond13 + ret void +}