diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -91,6 +91,7 @@ STATISTIC(NonIdenticalGuards, "Candidates have different guards"); STATISTIC(NonEmptyExitBlock, "Candidate has a non-empty exit block"); STATISTIC(NonEmptyGuardBlock, "Candidate has a non-empty guard block"); +STATISTIC(NotRotated, "Candidate is not rotated"); enum FusionDependenceAnalysisChoice { FUSION_DEPENDENCE_ANALYSIS_SCEV, @@ -319,6 +320,11 @@ return reportInvalidCandidate(NotSimplifiedForm); } + if (!isRotated()) { + LLVM_DEBUG(dbgs() << "Loop " << L->getName() << " is not rotated!\n"); + return reportInvalidCandidate(NotRotated); + } + return true; } diff --git a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll --- a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll +++ b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll @@ -15,60 +15,64 @@ ; CHECK: bb20.preheader ; CHECK: **************************** ; CHECK: Loop Fusion complete -define void @non_cfe(i32* noalias %arg) { +define void @non_cfe(i32* noalias %arg, i32 %N) { bb: - br label %bb5 - -bb5: ; preds = %bb14, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ] - %.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb16 + br label %bb7 -bb7: ; preds = %bb5 - %tmp = add nsw i32 %.01, -3 - %tmp8 = add nuw nsw i64 %indvars.iv2, 3 +bb7: ; preds = %bb, %bb14 + %.014 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] + %indvars.iv23 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb14 ] + %tmp = add nsw i32 %.014, -3 + %tmp8 = add nuw nsw i64 %indvars.iv23, 3 %tmp9 = trunc i64 %tmp8 to i32 %tmp10 = mul nsw i32 %tmp, %tmp9 - %tmp11 = trunc i64 %indvars.iv2 to i32 + %tmp11 = trunc i64 %indvars.iv23 to i32 %tmp12 = srem i32 %tmp10, %tmp11 - %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 + %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23 store i32 %tmp12, i32* %tmp13, align 4 br label %bb14 bb14: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - %tmp15 = add nuw nsw i32 %.01, 1 - br label %bb5 + %indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1 + %tmp15 = add nuw nsw i32 %.014, 1 + %exitcond4 = icmp ne i64 %indvars.iv.next3, 100 + br i1 %exitcond4, label %bb7, label %bb34 -bb16: ; preds = %bb5 +bb34: + %cmp = icmp slt i32 %N, 50 + br i1 %cmp, label %bb16, label %bb33 + +bb16: ; preds = %bb34 %tmp17 = load i32, i32* %arg, align 4 %tmp18 = icmp slt i32 %tmp17, 0 - br i1 %tmp18, label %bb20, label %bb33 + br i1 %tmp18, label %bb20.preheader, label %bb33 -bb20: ; preds = %bb30, %bb16 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb30 ], [ 0, %bb16 ] - %.0 = phi i32 [ 0, %bb16 ], [ %tmp31, %bb30 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb22, label %bb33 +bb20.preheader: ; preds = %bb16 + br label %bb22 -bb22: ; preds = %bb20 - %tmp23 = add nsw i32 %.0, -3 - %tmp24 = add nuw nsw i64 %indvars.iv, 3 +bb22: ; preds = %bb20.preheader, %bb30 + %.02 = phi i32 [ 0, %bb20.preheader ], [ %tmp31, %bb30 ] + %indvars.iv1 = phi i64 [ 0, %bb20.preheader ], [ %indvars.iv.next, %bb30 ] + %tmp23 = add nsw i32 %.02, -3 + %tmp24 = add nuw nsw i64 %indvars.iv1, 3 %tmp25 = trunc i64 %tmp24 to i32 %tmp26 = mul nsw i32 %tmp23, %tmp25 - %tmp27 = trunc i64 %indvars.iv to i32 + %tmp27 = trunc i64 %indvars.iv1 to i32 %tmp28 = srem i32 %tmp26, %tmp27 - %tmp29 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + %tmp29 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1 store i32 %tmp28, i32* %tmp29, align 4 br label %bb30 bb30: ; preds = %bb22 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %tmp31 = add nuw nsw i32 %.0, 1 - br label %bb20 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %tmp31 = add nuw nsw i32 %.02, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %bb22, label %bb33.loopexit -bb33: ; preds = %bb20, %bb16 +bb33.loopexit: ; preds = %bb30 + br label %bb33 + +bb33: ; preds = %bb33.loopexit, %bb16 ret void } @@ -88,54 +92,48 @@ ; CHECK: Loop Fusion complete define void @non_adjacent(i32* noalias %arg) { bb: - br label %bb3 - -bb3: ; preds = %bb11, %bb - %.01 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] - %exitcond2 = icmp ne i64 %.01, 100 - br i1 %exitcond2, label %bb5, label %bb4 + br label %bb5 -bb4: ; preds = %bb3 +bb4: ; preds = %bb11 br label %bb13 -bb5: ; preds = %bb3 - %tmp = add nsw i64 %.01, -3 - %tmp6 = add nuw nsw i64 %.01, 3 +bb5: ; preds = %bb, %bb11 + %.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] + %tmp = add nsw i64 %.013, -3 + %tmp6 = add nuw nsw i64 %.013, 3 %tmp7 = mul nsw i64 %tmp, %tmp6 - %tmp8 = srem i64 %tmp7, %.01 + %tmp8 = srem i64 %tmp7, %.013 %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.01 + %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.013 store i32 %tmp9, i32* %tmp10, align 4 br label %bb11 bb11: ; preds = %bb5 - %tmp12 = add nuw nsw i64 %.01, 1 - br label %bb3 + %tmp12 = add nuw nsw i64 %.013, 1 + %exitcond2 = icmp ne i64 %tmp12, 100 + br i1 %exitcond2, label %bb5, label %bb4 bb13: ; preds = %bb4 - br label %bb14 + br label %bb16 -bb14: ; preds = %bb23, %bb13 - %.0 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ] - %exitcond = icmp ne i64 %.0, 100 - br i1 %exitcond, label %bb16, label %bb15 - -bb15: ; preds = %bb14 +bb15: ; preds = %bb23 br label %bb25 -bb16: ; preds = %bb14 - %tmp17 = add nsw i64 %.0, -3 - %tmp18 = add nuw nsw i64 %.0, 3 +bb16: ; preds = %bb13, %bb23 + %.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ] + %tmp17 = add nsw i64 %.02, -3 + %tmp18 = add nuw nsw i64 %.02, 3 %tmp19 = mul nsw i64 %tmp17, %tmp18 - %tmp20 = srem i64 %tmp19, %.0 + %tmp20 = srem i64 %tmp19, %.02 %tmp21 = trunc i64 %tmp20 to i32 - %tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.0 + %tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.02 store i32 %tmp21, i32* %tmp22, align 4 br label %bb23 bb23: ; preds = %bb16 - %tmp24 = add nuw nsw i64 %.0, 1 - br label %bb14 + %tmp24 = add nuw nsw i64 %.02, 1 + %exitcond = icmp ne i64 %tmp24, 100 + br i1 %exitcond, label %bb16, label %bb15 bb25: ; preds = %bb15 ret void @@ -156,54 +154,48 @@ ; CHECK: Loop Fusion complete define void @different_bounds(i32* noalias %arg) { bb: - br label %bb3 - -bb3: ; preds = %bb11, %bb - %.01 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] - %exitcond2 = icmp ne i64 %.01, 100 - br i1 %exitcond2, label %bb5, label %bb4 + br label %bb5 -bb4: ; preds = %bb3 +bb4: ; preds = %bb11 br label %bb13 -bb5: ; preds = %bb3 - %tmp = add nsw i64 %.01, -3 - %tmp6 = add nuw nsw i64 %.01, 3 +bb5: ; preds = %bb, %bb11 + %.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ] + %tmp = add nsw i64 %.013, -3 + %tmp6 = add nuw nsw i64 %.013, 3 %tmp7 = mul nsw i64 %tmp, %tmp6 - %tmp8 = srem i64 %tmp7, %.01 + %tmp8 = srem i64 %tmp7, %.013 %tmp9 = trunc i64 %tmp8 to i32 - %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.01 + %tmp10 = getelementptr inbounds i32, i32* %arg, i64 %.013 store i32 %tmp9, i32* %tmp10, align 4 br label %bb11 bb11: ; preds = %bb5 - %tmp12 = add nuw nsw i64 %.01, 1 - br label %bb3 + %tmp12 = add nuw nsw i64 %.013, 1 + %exitcond2 = icmp ne i64 %tmp12, 100 + br i1 %exitcond2, label %bb5, label %bb4 bb13: ; preds = %bb4 - br label %bb14 + br label %bb16 -bb14: ; preds = %bb23, %bb13 - %.0 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ] - %exitcond = icmp ne i64 %.0, 200 - br i1 %exitcond, label %bb16, label %bb15 - -bb15: ; preds = %bb14 +bb15: ; preds = %bb23 br label %bb25 -bb16: ; preds = %bb14 - %tmp17 = add nsw i64 %.0, -3 - %tmp18 = add nuw nsw i64 %.0, 3 +bb16: ; preds = %bb13, %bb23 + %.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ] + %tmp17 = add nsw i64 %.02, -3 + %tmp18 = add nuw nsw i64 %.02, 3 %tmp19 = mul nsw i64 %tmp17, %tmp18 - %tmp20 = srem i64 %tmp19, %.0 + %tmp20 = srem i64 %tmp19, %.02 %tmp21 = trunc i64 %tmp20 to i32 - %tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.0 + %tmp22 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.02 store i32 %tmp21, i32* %tmp22, align 4 br label %bb23 bb23: ; preds = %bb16 - %tmp24 = add nuw nsw i64 %.0, 1 - br label %bb14 + %tmp24 = add nuw nsw i64 %.02, 1 + %exitcond = icmp ne i64 %tmp24, 200 + br i1 %exitcond, label %bb16, label %bb15 bb25: ; preds = %bb15 ret void @@ -225,41 +217,38 @@ ; CHECK: Loop Fusion complete define void @negative_dependence(i32* noalias %arg) { bb: - br label %bb5 + br label %bb7 -bb5: ; preds = %bb9, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb9 ], [ 0, %bb ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb11 +bb11.preheader: ; preds = %bb9 + br label %bb13 -bb7: ; preds = %bb5 - %tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 - %tmp8 = trunc i64 %indvars.iv2 to i32 +bb7: ; preds = %bb, %bb9 + %indvars.iv22 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb9 ] + %tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv22 + %tmp8 = trunc i64 %indvars.iv22 to i32 store i32 %tmp8, i32* %tmp, align 4 br label %bb9 bb9: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - br label %bb5 + %indvars.iv.next3 = add nuw nsw i64 %indvars.iv22, 1 + %exitcond4 = icmp ne i64 %indvars.iv.next3, 100 + br i1 %exitcond4, label %bb7, label %bb11.preheader -bb11: ; preds = %bb18, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb18 ], [ 0, %bb5 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb13, label %bb19 - -bb13: ; preds = %bb11 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 +bb13: ; preds = %bb11.preheader, %bb18 + %indvars.iv1 = phi i64 [ 0, %bb11.preheader ], [ %indvars.iv.next, %bb18 ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 %tmp14 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv.next %tmp15 = load i32, i32* %tmp14, align 4 %tmp16 = shl nsw i32 %tmp15, 1 - %tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + %tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1 store i32 %tmp16, i32* %tmp17, align 4 br label %bb18 bb18: ; preds = %bb13 - br label %bb11 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %bb13, label %bb19 -bb19: ; preds = %bb11 +bb19: ; preds = %bb18 ret void } @@ -278,45 +267,42 @@ ; CHECK: Attempting fusion on Candidate Set: ; CHECK-NEXT: [[LOOP1PREHEADER]] ; CHECK-NEXT: [[LOOP2PREHEADER]] -; CHECK: Memory dependencies do not allow fusion! +; CHECK: Fusion candidate does not have empty preheader. Not fusing. ; CHECK: Loop Fusion complete define i32 @sumTest(i32* noalias %arg) { bb: - br label %bb6 + br label %bb9 -bb6: ; preds = %bb9, %bb - %indvars.iv3 = phi i64 [ %indvars.iv.next4, %bb9 ], [ 0, %bb ] - %.01 = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ] - %exitcond5 = icmp ne i64 %indvars.iv3, 100 - br i1 %exitcond5, label %bb9, label %bb13 +bb13.preheader: ; preds = %bb9 + %.01.lcssa = phi i32 [ %tmp11, %bb9 ] + br label %bb15 -bb9: ; preds = %bb6 - %tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv3 +bb9: ; preds = %bb, %bb9 + %.013 = phi i32 [ 0, %bb ], [ %tmp11, %bb9 ] + %indvars.iv32 = phi i64 [ 0, %bb ], [ %indvars.iv.next4, %bb9 ] + %tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv32 %tmp10 = load i32, i32* %tmp, align 4 - %tmp11 = add nsw i32 %.01, %tmp10 - %indvars.iv.next4 = add nuw nsw i64 %indvars.iv3, 1 - br label %bb6 - -bb13: ; preds = %bb20, %bb6 - %.01.lcssa = phi i32 [ %.01, %bb6 ], [ %.01.lcssa, %bb20 ] - %indvars.iv = phi i64 [ %indvars.iv.next, %bb20 ], [ 0, %bb6 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb15, label %bb14 + %tmp11 = add nsw i32 %.013, %tmp10 + %indvars.iv.next4 = add nuw nsw i64 %indvars.iv32, 1 + %exitcond5 = icmp ne i64 %indvars.iv.next4, 100 + br i1 %exitcond5, label %bb9, label %bb13.preheader -bb14: ; preds = %bb13 +bb14: ; preds = %bb20 br label %bb21 -bb15: ; preds = %bb13 - %tmp16 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv +bb15: ; preds = %bb13.preheader, %bb20 + %indvars.iv1 = phi i64 [ 0, %bb13.preheader ], [ %indvars.iv.next, %bb20 ] + %tmp16 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv1 %tmp17 = load i32, i32* %tmp16, align 4 %tmp18 = sdiv i32 %tmp17, %.01.lcssa - %tmp19 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + %tmp19 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1 store i32 %tmp18, i32* %tmp19, align 4 br label %bb20 bb20: ; preds = %bb15 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb13 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %bb15, label %bb14 bb21: ; preds = %bb14 ret i32 %.01.lcssa diff --git a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll --- a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll +++ b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll @@ -1,301 +1,271 @@ ; RUN: opt -S -loop-fusion -pass-remarks-missed=loop-fusion -disable-output < %s 2>&1 | FileCheck %s -; + +; ModuleID = '' +source_filename = "" target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" @B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0 ; CHECK: remark: diagnostics_missed.c:18:3: [non_adjacent]: entry and for.end: Loops are not adjacent -define void @non_adjacent(i32* noalias %A) !dbg !67 { +define void @non_adjacent(i32* noalias %A) !dbg !14 { entry: - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] - %exitcond1 = icmp ne i64 %i.0, 100 - br i1 %exitcond1, label %for.body, label %for.cond.cleanup + br label %for.body -for.cond.cleanup: ; preds = %for.cond +for.cond.cleanup: ; preds = %for.inc br label %for.end -for.body: ; preds = %for.cond - %sub = add nsw i64 %i.0, -3 - %add = add nuw nsw i64 %i.0, 3 +for.body: ; preds = %entry, %for.inc + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %sub = add nsw i64 %i.02, -3 + %add = add nuw nsw i64 %i.02, 3 %mul = mul nsw i64 %sub, %add - %rem = srem i64 %mul, %i.0 + %rem = srem i64 %mul, %i.02 %conv = trunc i64 %rem to i32 - %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.02 store i32 %conv, i32* %arrayidx, align 4 br label %for.inc for.inc: ; preds = %for.body - %inc = add nuw nsw i64 %i.0, 1, !dbg !86 - br label %for.cond, !dbg !87, !llvm.loop !88 + %inc = add nuw nsw i64 %i.02, 1, !dbg !26 + %exitcond1 = icmp ne i64 %inc, 100 + br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !28 for.end: ; preds = %for.cond.cleanup - br label %for.cond2 + br label %for.body6 -for.cond2: ; preds = %for.inc13, %for.end - %i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ] - %exitcond = icmp ne i64 %i1.0, 100 - br i1 %exitcond, label %for.body6, label %for.cond.cleanup5 - -for.cond.cleanup5: ; preds = %for.cond2 +for.cond.cleanup5: ; preds = %for.inc13 br label %for.end15 -for.body6: ; preds = %for.cond2 - %sub7 = add nsw i64 %i1.0, -3 - %add8 = add nuw nsw i64 %i1.0, 3 +for.body6: ; preds = %for.end, %for.inc13 + %i1.01 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ] + %sub7 = add nsw i64 %i1.01, -3 + %add8 = add nuw nsw i64 %i1.01, 3 %mul9 = mul nsw i64 %sub7, %add8 - %rem10 = srem i64 %mul9, %i1.0 + %rem10 = srem i64 %mul9, %i1.01 %conv11 = trunc i64 %rem10 to i32 - %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0 + %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.01 store i32 %conv11, i32* %arrayidx12, align 4 br label %for.inc13 for.inc13: ; preds = %for.body6 - %inc14 = add nuw nsw i64 %i1.0, 1, !dbg !100 - br label %for.cond2, !dbg !101, !llvm.loop !102 + %inc14 = add nuw nsw i64 %i1.01, 1, !dbg !31 + %exitcond = icmp ne i64 %inc14, 100 + br i1 %exitcond, label %for.body6, label %for.cond.cleanup5, !llvm.loop !33 for.end15: ; preds = %for.cond.cleanup5 ret void } - ; CHECK: remark: diagnostics_missed.c:28:3: [different_bounds]: entry and for.end: Loop trip counts are not the same -define void @different_bounds(i32* noalias %A) !dbg !105 { +define void @different_bounds(i32* noalias %A) !dbg !36 { entry: - br label %for.cond + br label %for.body -for.cond: ; preds = %for.inc, %entry - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] - %exitcond1 = icmp ne i64 %i.0, 100 - br i1 %exitcond1, label %for.body, label %for.cond.cleanup - -for.cond.cleanup: ; preds = %for.cond +for.cond.cleanup: ; preds = %for.inc br label %for.end -for.body: ; preds = %for.cond - %sub = add nsw i64 %i.0, -3 - %add = add nuw nsw i64 %i.0, 3 +for.body: ; preds = %entry, %for.inc + %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] + %sub = add nsw i64 %i.02, -3 + %add = add nuw nsw i64 %i.02, 3 %mul = mul nsw i64 %sub, %add - %rem = srem i64 %mul, %i.0 + %rem = srem i64 %mul, %i.02 %conv = trunc i64 %rem to i32 - %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.02 store i32 %conv, i32* %arrayidx, align 4 br label %for.inc for.inc: ; preds = %for.body - %inc = add nuw nsw i64 %i.0, 1, !dbg !123 - br label %for.cond, !dbg !124, !llvm.loop !125 + %inc = add nuw nsw i64 %i.02, 1, !dbg !43 + %exitcond1 = icmp ne i64 %inc, 100 + br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !45 for.end: ; preds = %for.cond.cleanup - br label %for.cond2 - -for.cond2: ; preds = %for.inc13, %for.end - %i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ] - %exitcond = icmp ne i64 %i1.0, 200 - br i1 %exitcond, label %for.body6, label %for.cond.cleanup5 + br label %for.body6 -for.cond.cleanup5: ; preds = %for.cond2 +for.cond.cleanup5: ; preds = %for.inc13 br label %for.end15 -for.body6: ; preds = %for.cond2 - %sub7 = add nsw i64 %i1.0, -3 - %add8 = add nuw nsw i64 %i1.0, 3 +for.body6: ; preds = %for.end, %for.inc13 + %i1.01 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ] + %sub7 = add nsw i64 %i1.01, -3 + %add8 = add nuw nsw i64 %i1.01, 3 %mul9 = mul nsw i64 %sub7, %add8 - %rem10 = srem i64 %mul9, %i1.0 + %rem10 = srem i64 %mul9, %i1.01 %conv11 = trunc i64 %rem10 to i32 - %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0 + %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.01 store i32 %conv11, i32* %arrayidx12, align 4 br label %for.inc13 for.inc13: ; preds = %for.body6 - %inc14 = add nuw nsw i64 %i1.0, 1 - br label %for.cond2, !dbg !138, !llvm.loop !139 + %inc14 = add nuw nsw i64 %i1.01, 1 + %exitcond = icmp ne i64 %inc14, 200 + br i1 %exitcond, label %for.body6, label %for.cond.cleanup5, !llvm.loop !48 for.end15: ; preds = %for.cond.cleanup5 ret void } ; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: entry and for.end: Loop has a non-empty preheader -define void @negative_dependence(i32* noalias %A) !dbg !142 { +define void @negative_dependence(i32* noalias %A) !dbg !51 { entry: - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ] - %exitcond3 = icmp ne i64 %indvars.iv1, 100 - br i1 %exitcond3, label %for.body, label %for.end + br label %for.body -for.body: ; preds = %for.cond - %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 - %tmp = trunc i64 %indvars.iv1 to i32 +for.body: ; preds = %entry, %for.inc + %indvars.iv13 = phi i64 [ 0, %entry ], [ %indvars.iv.next2, %for.inc ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv13 + %tmp = trunc i64 %indvars.iv13 to i32 store i32 %tmp, i32* %arrayidx, align 4 br label %for.inc for.inc: ; preds = %for.body - %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 - br label %for.cond, !dbg !160, !llvm.loop !161 + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv13, 1 + %exitcond3 = icmp ne i64 %indvars.iv.next2, 100 + br i1 %exitcond3, label %for.body, label %for.end, !llvm.loop !58 -for.end: ; preds = %for.cond - call void @llvm.dbg.value(metadata i32 0, metadata !147, metadata !DIExpression()), !dbg !163 - br label %for.cond2, !dbg !164 +for.end: ; preds = %for.inc + call void @llvm.dbg.value(metadata i32 0, metadata !56, metadata !DIExpression()), !dbg !61 + br label %for.body5 -for.cond2: ; preds = %for.inc10, %for.end - %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.end ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %for.body5, label %for.end12 - -for.body5: ; preds = %for.cond2 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 +for.body5: ; preds = %for.end, %for.inc10 + %indvars.iv2 = phi i64 [ 0, %for.end ], [ %indvars.iv.next, %for.inc10 ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1 %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next %tmp4 = load i32, i32* %arrayidx7, align 4 %mul = shl nsw i32 %tmp4, 1 - %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv2 store i32 %mul, i32* %arrayidx9, align 4 br label %for.inc10 for.inc10: ; preds = %for.body5 - br label %for.cond2 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %for.body5, label %for.end12 -for.end12: ; preds = %for.cond. - ret void, !dbg !178 +for.end12: ; preds = %for.inc10 + ret void, !dbg !62 } ; CHECK: remark: diagnostics_missed.c:51:3: [sumTest]: entry and for.cond2.preheader: Dependencies prevent fusion -define i32 @sumTest(i32* noalias %A) !dbg !179 { +define i32 @sumTest(i32* noalias %A) !dbg !63 { entry: - br label %for.cond + br label %for.body -for.cond: ; preds = %for.inc, %entry - %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ] - %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ] - %exitcond3 = icmp ne i64 %indvars.iv1, 100 - br i1 %exitcond3, label %for.body, label %for.cond2 +for.cond2.preheader: ; preds = %for.inc + br label %for.body5 -for.body: ; preds = %for.cond +for.body: ; preds = %entry, %for.inc + %sum.04 = phi i32 [ 0, %entry ], [ %add, %for.inc ] + %indvars.iv13 = phi i64 [ 0, %entry ], [ %indvars.iv.next2, %for.inc ] br label %for.inc for.inc: ; preds = %for.body - %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv13 %tmp = load i32, i32* %arrayidx, align 4 - %add = add nsw i32 %sum.0, %tmp - %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1 - br label %for.cond, !dbg !199, !llvm.loop !200 - -for.cond2: ; preds = %for.inc10, %for.cond - %sum.0.lcssa = phi i32 [ %sum.0, %for.cond ], [ %sum.0.lcssa, %for.inc10 ] - %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.cond ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %for.body5, label %for.end12 - -for.body5: ; preds = %for.cond2 - %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv + %add = add nsw i32 %sum.04, %tmp + %indvars.iv.next2 = add nuw nsw i64 %indvars.iv13, 1 + %exitcond3 = icmp ne i64 %indvars.iv.next2, 100 + br i1 %exitcond3, label %for.body, label %for.cond2.preheader, !llvm.loop !73 + +for.body5: ; preds = %for.cond2.preheader, %for.inc10 + %indvars.iv2 = phi i64 [ 0, %for.cond2.preheader ], [ %indvars.iv.next, %for.inc10 ] + %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv2 %tmp4 = load i32, i32* %arrayidx7, align 4 - %div = sdiv i32 %tmp4, %sum.0.lcssa - %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + %div = sdiv i32 %tmp4, %add + %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv2 store i32 %div, i32* %arrayidx9, align 4 br label %for.inc10 for.inc10: ; preds = %for.body5 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %for.cond2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %for.body5, label %for.end12 -for.end12: ; preds = %for.cond2 - ret i32 %sum.0.lcssa, !dbg !215 +for.end12: ; preds = %for.inc10 + ret i32 %add, !dbg !76 } -declare void @llvm.dbg.value(metadata, metadata, metadata) +; Function Attrs: nounwind readnone speculatable willreturn +declare void @llvm.dbg.value(metadata, metadata, metadata) #0 +attributes #0 = { nounwind readnone speculatable willreturn } !llvm.dbg.cu = !{!2} -!llvm.module.flags = !{!11, !12, !13, !14} +!llvm.module.flags = !{!10, !11, !12, !13} !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) -!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !6, line: 46, type: !7, isLocal: false, isDefinition: true) +!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 46, type: !6, isLocal: false, isDefinition: true) !2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git@github.ibm.com:compiler/llvm-project.git 23c4baaa9f5b33d2d52eda981d376c6b0a7a3180)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU) !3 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp") !4 = !{} !5 = !{!0} -!6 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp") -!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 32768, elements: !9) -!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -!9 = !{!10} -!10 = !DISubrange(count: 1024) -!11 = !{i32 2, !"Dwarf Version", i32 4} -!12 = !{i32 2, !"Debug Info Version", i32 3} -!13 = !{i32 1, !"wchar_size", i32 4} -!14 = !{i32 7, !"PIC Level", i32 2} -!17 = !DISubroutineType(types: !18) -!18 = !{null, !19} -!19 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !20) -!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64) -!67 = distinct !DISubprogram(name: "non_adjacent", scope: !6, file: !6, line: 17, type: !17, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !68) -!68 = !{!69, !70, !73} -!69 = !DILocalVariable(name: "A", arg: 1, scope: !67, file: !6, line: 17, type: !19) -!70 = !DILocalVariable(name: "i", scope: !71, file: !6, line: 18, type: !72) -!71 = distinct !DILexicalBlock(scope: !67, file: !6, line: 18, column: 3) -!72 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) -!73 = !DILocalVariable(name: "i", scope: !74, file: !6, line: 22, type: !72) -!74 = distinct !DILexicalBlock(scope: !67, file: !6, line: 22, column: 3) -!79 = distinct !DILexicalBlock(scope: !71, file: !6, line: 18, column: 3) -!80 = !DILocation(line: 18, column: 3, scope: !71) -!86 = !DILocation(line: 18, column: 30, scope: !79) -!87 = !DILocation(line: 18, column: 3, scope: !79) -!88 = distinct !{!88, !80, !89} -!89 = !DILocation(line: 20, column: 3, scope: !71) -!93 = distinct !DILexicalBlock(scope: !74, file: !6, line: 22, column: 3) -!94 = !DILocation(line: 22, column: 3, scope: !74) -!100 = !DILocation(line: 22, column: 30, scope: !93) -!101 = !DILocation(line: 22, column: 3, scope: !93) -!102 = distinct !{!102, !94, !103} -!103 = !DILocation(line: 24, column: 3, scope: !74) -!105 = distinct !DISubprogram(name: "different_bounds", scope: !6, file: !6, line: 27, type: !17, scopeLine: 27, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !106) -!106 = !{!107, !108, !110} -!107 = !DILocalVariable(name: "A", arg: 1, scope: !105, file: !6, line: 27, type: !19) -!108 = !DILocalVariable(name: "i", scope: !109, file: !6, line: 28, type: !72) -!109 = distinct !DILexicalBlock(scope: !105, file: !6, line: 28, column: 3) -!110 = !DILocalVariable(name: "i", scope: !111, file: !6, line: 32, type: !72) -!111 = distinct !DILexicalBlock(scope: !105, file: !6, line: 32, column: 3) -!116 = distinct !DILexicalBlock(scope: !109, file: !6, line: 28, column: 3) -!117 = !DILocation(line: 28, column: 3, scope: !109) -!123 = !DILocation(line: 28, column: 30, scope: !116) -!124 = !DILocation(line: 28, column: 3, scope: !116) -!125 = distinct !{!125, !117, !126} -!126 = !DILocation(line: 30, column: 3, scope: !109) -!130 = distinct !DILexicalBlock(scope: !111, file: !6, line: 32, column: 3) -!131 = !DILocation(line: 32, column: 3, scope: !111) -!138 = !DILocation(line: 32, column: 3, scope: !130) -!139 = distinct !{!139, !131, !140} -!140 = !DILocation(line: 34, column: 3, scope: !111) -!142 = distinct !DISubprogram(name: "negative_dependence", scope: !6, file: !6, line: 37, type: !17, scopeLine: 37, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !143) -!143 = !{!144, !145, !147} -!144 = !DILocalVariable(name: "A", arg: 1, scope: !142, file: !6, line: 37, type: !19) -!145 = !DILocalVariable(name: "i", scope: !146, file: !6, line: 38, type: !8) -!146 = distinct !DILexicalBlock(scope: !142, file: !6, line: 38, column: 3) -!147 = !DILocalVariable(name: "i", scope: !148, file: !6, line: 42, type: !8) -!148 = distinct !DILexicalBlock(scope: !142, file: !6, line: 42, column: 3) -!153 = distinct !DILexicalBlock(scope: !146, file: !6, line: 38, column: 3) -!154 = !DILocation(line: 38, column: 3, scope: !146) -!160 = !DILocation(line: 38, column: 3, scope: !153) -!161 = distinct !{!161, !154, !162} -!162 = !DILocation(line: 40, column: 3, scope: !146) -!163 = !DILocation(line: 0, scope: !148) -!164 = !DILocation(line: 42, column: 8, scope: !148) -!178 = !DILocation(line: 45, column: 1, scope: !142) -!179 = distinct !DISubprogram(name: "sumTest", scope: !6, file: !6, line: 48, type: !180, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !182) -!180 = !DISubroutineType(types: !181) -!181 = !{!8, !19} -!182 = !{!183, !184, !185, !187} -!183 = !DILocalVariable(name: "A", arg: 1, scope: !179, file: !6, line: 48, type: !19) -!184 = !DILocalVariable(name: "sum", scope: !179, file: !6, line: 49, type: !8) -!185 = !DILocalVariable(name: "i", scope: !186, file: !6, line: 51, type: !8) -!186 = distinct !DILexicalBlock(scope: !179, file: !6, line: 51, column: 3) -!187 = !DILocalVariable(name: "i", scope: !188, file: !6, line: 54, type: !8) -!188 = distinct !DILexicalBlock(scope: !179, file: !6, line: 54, column: 3) -!193 = distinct !DILexicalBlock(scope: !186, file: !6, line: 51, column: 3) -!194 = !DILocation(line: 51, column: 3, scope: !186) -!199 = !DILocation(line: 51, column: 3, scope: !193) -!200 = distinct !{!200, !194, !201} -!201 = !DILocation(line: 52, column: 15, scope: !186) -!215 = !DILocation(line: 57, column: 3, scope: !179) +!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32768, elements: !8) +!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!8 = !{!9} +!9 = !DISubrange(count: 1024) +!10 = !{i32 2, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{i32 1, !"wchar_size", i32 4} +!13 = !{i32 7, !"PIC Level", i32 2} +!14 = distinct !DISubprogram(name: "non_adjacent", scope: !3, file: !3, line: 17, type: !15, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19) +!15 = !DISubroutineType(types: !16) +!16 = !{null, !17} +!17 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !18) +!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !7, size: 64) +!19 = !{!20, !21, !24} +!20 = !DILocalVariable(name: "A", arg: 1, scope: !14, file: !3, line: 17, type: !17) +!21 = !DILocalVariable(name: "i", scope: !22, file: !3, line: 18, type: !23) +!22 = distinct !DILexicalBlock(scope: !14, file: !3, line: 18, column: 3) +!23 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) +!24 = !DILocalVariable(name: "i", scope: !25, file: !3, line: 22, type: !23) +!25 = distinct !DILexicalBlock(scope: !14, file: !3, line: 22, column: 3) +!26 = !DILocation(line: 18, column: 30, scope: !27) +!27 = distinct !DILexicalBlock(scope: !22, file: !3, line: 18, column: 3) +!28 = distinct !{!28, !29, !30} +!29 = !DILocation(line: 18, column: 3, scope: !22) +!30 = !DILocation(line: 20, column: 3, scope: !22) +!31 = !DILocation(line: 22, column: 30, scope: !32) +!32 = distinct !DILexicalBlock(scope: !25, file: !3, line: 22, column: 3) +!33 = distinct !{!33, !34, !35} +!34 = !DILocation(line: 22, column: 3, scope: !25) +!35 = !DILocation(line: 24, column: 3, scope: !25) +!36 = distinct !DISubprogram(name: "different_bounds", scope: !3, file: !3, line: 27, type: !15, scopeLine: 27, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !37) +!37 = !{!38, !39, !41} +!38 = !DILocalVariable(name: "A", arg: 1, scope: !36, file: !3, line: 27, type: !17) +!39 = !DILocalVariable(name: "i", scope: !40, file: !3, line: 28, type: !23) +!40 = distinct !DILexicalBlock(scope: !36, file: !3, line: 28, column: 3) +!41 = !DILocalVariable(name: "i", scope: !42, file: !3, line: 32, type: !23) +!42 = distinct !DILexicalBlock(scope: !36, file: !3, line: 32, column: 3) +!43 = !DILocation(line: 28, column: 30, scope: !44) +!44 = distinct !DILexicalBlock(scope: !40, file: !3, line: 28, column: 3) +!45 = distinct !{!45, !46, !47} +!46 = !DILocation(line: 28, column: 3, scope: !40) +!47 = !DILocation(line: 30, column: 3, scope: !40) +!48 = distinct !{!48, !49, !50} +!49 = !DILocation(line: 32, column: 3, scope: !42) +!50 = !DILocation(line: 34, column: 3, scope: !42) +!51 = distinct !DISubprogram(name: "negative_dependence", scope: !3, file: !3, line: 37, type: !15, scopeLine: 37, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !52) +!52 = !{!53, !54, !56} +!53 = !DILocalVariable(name: "A", arg: 1, scope: !51, file: !3, line: 37, type: !17) +!54 = !DILocalVariable(name: "i", scope: !55, file: !3, line: 38, type: !7) +!55 = distinct !DILexicalBlock(scope: !51, file: !3, line: 38, column: 3) +!56 = !DILocalVariable(name: "i", scope: !57, file: !3, line: 42, type: !7) +!57 = distinct !DILexicalBlock(scope: !51, file: !3, line: 42, column: 3) +!58 = distinct !{!58, !59, !60} +!59 = !DILocation(line: 38, column: 3, scope: !55) +!60 = !DILocation(line: 40, column: 3, scope: !55) +!61 = !DILocation(line: 0, scope: !57) +!62 = !DILocation(line: 45, column: 1, scope: !51) +!63 = distinct !DISubprogram(name: "sumTest", scope: !3, file: !3, line: 48, type: !64, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !66) +!64 = !DISubroutineType(types: !65) +!65 = !{!7, !17} +!66 = !{!67, !68, !69, !71} +!67 = !DILocalVariable(name: "A", arg: 1, scope: !63, file: !3, line: 48, type: !17) +!68 = !DILocalVariable(name: "sum", scope: !63, file: !3, line: 49, type: !7) +!69 = !DILocalVariable(name: "i", scope: !70, file: !3, line: 51, type: !7) +!70 = distinct !DILexicalBlock(scope: !63, file: !3, line: 51, column: 3) +!71 = !DILocalVariable(name: "i", scope: !72, file: !3, line: 54, type: !7) +!72 = distinct !DILexicalBlock(scope: !63, file: !3, line: 54, column: 3) +!73 = distinct !{!73, !74, !75} +!74 = !DILocation(line: 51, column: 3, scope: !70) +!75 = !DILocation(line: 52, column: 15, scope: !70) +!76 = !DILocation(line: 57, column: 3, scope: !63) diff --git a/llvm/test/Transforms/LoopFusion/four_loops.ll b/llvm/test/Transforms/LoopFusion/four_loops.ll --- a/llvm/test/Transforms/LoopFusion/four_loops.ll +++ b/llvm/test/Transforms/LoopFusion/four_loops.ll @@ -9,127 +9,113 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %exitcond12, label %[[LOOP1BODY:bb[0-9]+]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] ; CHECK: br label %[[LOOP1LATCH:bb[0-9]+]] ; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %exitcond9, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP3PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2HEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]+]], label %[[LOOP2BODY]] +; CHECK: [[LOOP2BODY]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP3PREHEADER]] -; CHECK: [[LOOP3PREHEADER]] -; CHECK: br i1 %exitcond6, label %[[LOOP3HEADER:bb[0-9]+]], label %[[LOOP4PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP3HEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP3BODY]] +; CHECK: [[LOOP3BODY]] ; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]] ; CHECK: [[LOOP3LATCH]] -; CHECK: br label %[[LOOP4PREHEADER]] -; CHECK: [[LOOP4PREHEADER]] -; CHECK: br i1 %exitcond, label %[[LOOP4HEADER:bb[0-9]+]], label %[[LOOP4EXIT:bb[0-9]+]] -; CHECK: [[LOOP4EXIT]] -; CHECK: br label %[[FUNCEXIT:bb[0-9]+]] -; CHECK: [[LOOP4HEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP4BODY]] +; CHECK: [[LOOP4BODY]] ; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]] ; CHECK: [[LOOP4LATCH]] -; CHECK: br label %[[LOOP1HEADER]] -; CHECK: [[FUNCEXIT]] +; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOPEXIT:bb[0-9]+]] ; CHECK: ret void define void @dep_free() { bb: - br label %bb13 + br label %bb15 -bb13: ; preds = %bb22, %bb - %indvars.iv10 = phi i64 [ %indvars.iv.next11, %bb22 ], [ 0, %bb ] - %.0 = phi i32 [ 0, %bb ], [ %tmp23, %bb22 ] - %exitcond12 = icmp ne i64 %indvars.iv10, 100 - br i1 %exitcond12, label %bb15, label %bb25 +bb25.preheader: ; preds = %bb22 + br label %bb27 -bb15: ; preds = %bb13 - %tmp = add nsw i32 %.0, -3 - %tmp16 = add nuw nsw i64 %indvars.iv10, 3 +bb15: ; preds = %bb, %bb22 + %.08 = phi i32 [ 0, %bb ], [ %tmp23, %bb22 ] + %indvars.iv107 = phi i64 [ 0, %bb ], [ %indvars.iv.next11, %bb22 ] + %tmp = add nsw i32 %.08, -3 + %tmp16 = add nuw nsw i64 %indvars.iv107, 3 %tmp17 = trunc i64 %tmp16 to i32 %tmp18 = mul nsw i32 %tmp, %tmp17 - %tmp19 = trunc i64 %indvars.iv10 to i32 + %tmp19 = trunc i64 %indvars.iv107 to i32 %tmp20 = srem i32 %tmp18, %tmp19 - %tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv10 + %tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @A, i64 0, i64 %indvars.iv107 store i32 %tmp20, i32* %tmp21, align 4 br label %bb22 bb22: ; preds = %bb15 - %indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1 - %tmp23 = add nuw nsw i32 %.0, 1 - br label %bb13 - -bb25: ; preds = %bb35, %bb13 - %indvars.iv7 = phi i64 [ %indvars.iv.next8, %bb35 ], [ 0, %bb13 ] - %.01 = phi i32 [ 0, %bb13 ], [ %tmp36, %bb35 ] - %exitcond9 = icmp ne i64 %indvars.iv7, 100 - br i1 %exitcond9, label %bb27, label %bb38 - -bb27: ; preds = %bb25 - %tmp28 = add nsw i32 %.01, -3 - %tmp29 = add nuw nsw i64 %indvars.iv7, 3 + %indvars.iv.next11 = add nuw nsw i64 %indvars.iv107, 1 + %tmp23 = add nuw nsw i32 %.08, 1 + %exitcond12 = icmp ne i64 %indvars.iv.next11, 100 + br i1 %exitcond12, label %bb15, label %bb25.preheader + +bb38.preheader: ; preds = %bb35 + br label %bb40 + +bb27: ; preds = %bb25.preheader, %bb35 + %.016 = phi i32 [ 0, %bb25.preheader ], [ %tmp36, %bb35 ] + %indvars.iv75 = phi i64 [ 0, %bb25.preheader ], [ %indvars.iv.next8, %bb35 ] + %tmp28 = add nsw i32 %.016, -3 + %tmp29 = add nuw nsw i64 %indvars.iv75, 3 %tmp30 = trunc i64 %tmp29 to i32 %tmp31 = mul nsw i32 %tmp28, %tmp30 - %tmp32 = trunc i64 %indvars.iv7 to i32 + %tmp32 = trunc i64 %indvars.iv75 to i32 %tmp33 = srem i32 %tmp31, %tmp32 - %tmp34 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv7 + %tmp34 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv75 store i32 %tmp33, i32* %tmp34, align 4 br label %bb35 bb35: ; preds = %bb27 - %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1 - %tmp36 = add nuw nsw i32 %.01, 1 - br label %bb25 - -bb38: ; preds = %bb48, %bb25 - %indvars.iv4 = phi i64 [ %indvars.iv.next5, %bb48 ], [ 0, %bb25 ] - %.02 = phi i32 [ 0, %bb25 ], [ %tmp49, %bb48 ] - %exitcond6 = icmp ne i64 %indvars.iv4, 100 - br i1 %exitcond6, label %bb40, label %bb51 - -bb40: ; preds = %bb38 - %tmp41 = add nsw i32 %.02, -3 - %tmp42 = add nuw nsw i64 %indvars.iv4, 3 + %indvars.iv.next8 = add nuw nsw i64 %indvars.iv75, 1 + %tmp36 = add nuw nsw i32 %.016, 1 + %exitcond9 = icmp ne i64 %indvars.iv.next8, 100 + br i1 %exitcond9, label %bb27, label %bb38.preheader + +bb51.preheader: ; preds = %bb48 + br label %bb53 + +bb40: ; preds = %bb38.preheader, %bb48 + %.024 = phi i32 [ 0, %bb38.preheader ], [ %tmp49, %bb48 ] + %indvars.iv43 = phi i64 [ 0, %bb38.preheader ], [ %indvars.iv.next5, %bb48 ] + %tmp41 = add nsw i32 %.024, -3 + %tmp42 = add nuw nsw i64 %indvars.iv43, 3 %tmp43 = trunc i64 %tmp42 to i32 %tmp44 = mul nsw i32 %tmp41, %tmp43 - %tmp45 = trunc i64 %indvars.iv4 to i32 + %tmp45 = trunc i64 %indvars.iv43 to i32 %tmp46 = srem i32 %tmp44, %tmp45 - %tmp47 = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %indvars.iv4 + %tmp47 = getelementptr inbounds [1024 x i32], [1024 x i32]* @C, i64 0, i64 %indvars.iv43 store i32 %tmp46, i32* %tmp47, align 4 br label %bb48 bb48: ; preds = %bb40 - %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 - %tmp49 = add nuw nsw i32 %.02, 1 - br label %bb38 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv43, 1 + %tmp49 = add nuw nsw i32 %.024, 1 + %exitcond6 = icmp ne i64 %indvars.iv.next5, 100 + br i1 %exitcond6, label %bb40, label %bb51.preheader -bb51: ; preds = %bb61, %bb38 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb61 ], [ 0, %bb38 ] - %.03 = phi i32 [ 0, %bb38 ], [ %tmp62, %bb61 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb53, label %bb52 - -bb52: ; preds = %bb51 +bb52: ; preds = %bb61 br label %bb63 -bb53: ; preds = %bb51 - %tmp54 = add nsw i32 %.03, -3 - %tmp55 = add nuw nsw i64 %indvars.iv, 3 +bb53: ; preds = %bb51.preheader, %bb61 + %.032 = phi i32 [ 0, %bb51.preheader ], [ %tmp62, %bb61 ] + %indvars.iv1 = phi i64 [ 0, %bb51.preheader ], [ %indvars.iv.next, %bb61 ] + %tmp54 = add nsw i32 %.032, -3 + %tmp55 = add nuw nsw i64 %indvars.iv1, 3 %tmp56 = trunc i64 %tmp55 to i32 %tmp57 = mul nsw i32 %tmp54, %tmp56 - %tmp58 = trunc i64 %indvars.iv to i32 + %tmp58 = trunc i64 %indvars.iv1 to i32 %tmp59 = srem i32 %tmp57, %tmp58 - %tmp60 = getelementptr inbounds [1024 x i32], [1024 x i32]* @D, i64 0, i64 %indvars.iv + %tmp60 = getelementptr inbounds [1024 x i32], [1024 x i32]* @D, i64 0, i64 %indvars.iv1 store i32 %tmp59, i32* %tmp60, align 4 br label %bb61 bb61: ; preds = %bb53 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %tmp62 = add nuw nsw i32 %.03, 1 - br label %bb51 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %tmp62 = add nuw nsw i32 %.032, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %bb53, label %bb52 bb63: ; preds = %bb52 ret void diff --git a/llvm/test/Transforms/LoopFusion/loop_nest.ll b/llvm/test/Transforms/LoopFusion/loop_nest.ll --- a/llvm/test/Transforms/LoopFusion/loop_nest.ll +++ b/llvm/test/Transforms/LoopFusion/loop_nest.ll @@ -23,24 +23,21 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]+]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %exitcond12, label %[[LOOP3PREHEADER:bb[0-9]+.preheader]], label %[[LOOP2HEADER:bb[0-9]+]] -; CHECK: [[LOOP3PREHEADER]] ; CHECK: br label %[[LOOP3HEADER:bb[0-9]+]] ; CHECK: [[LOOP3HEADER]] -; CHECK: br i1 %exitcond9, label %[[LOOP3BODY:bb[0-9]+]], label %[[LOOP1LATCH:bb[0-9]+]] +; CHECK: br label %[[LOOP3LATCH:bb[0-9]+]] +; CHECK: [[LOOP3LATCH]] +; CHECK: br i1 %{{.*}}, label %[[LOOP3HEADER]], label %[[LOOP1LATCH:bb[0-9]+]] ; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2HEADER:bb[0-9]+]] -; CHECK: [[LOOP2HEADER]] -; CHECK: br i1 %exitcond6, label %[[LOOP4PREHEADER:bb[0-9]+.preheader]], label %[[LOOP2EXITBLOCK:bb[0-9]+]] -; CHECK: [[LOOP4PREHEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP2PREHEADER:bb[0-9]+]], label %[[LOOP2PREHEADER]] +; CHECK: [[LOOP2PREHEADER]] ; CHECK: br label %[[LOOP4HEADER:bb[0-9]+]] -; CHECK: [[LOOP2EXITBLOCK]] -; CHECK-NEXT: br label %[[FUNCEXIT:bb[0-9]+]] ; CHECK: [[LOOP4HEADER]] -; CHECK: br i1 %exitcond, label %[[LOOP4BODY:bb[0-9]+]], label %[[LOOP2LATCH:bb[0-9]+]] +; CHECK: br label %[[LOOP4LATCH:bb[0-9]+]] +; CHECK: [[LOOP4LATCH]] +; CHECK: br i1 %{{.*}}, label %[[LOOP4HEADER]], label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER:bb[0-9]+]] -; CHECK: [[FUNCEXIT]] +; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[LOOP1EXIT:bb[0-9]*]] ; CHECK: ret void ; TODO: The current version of loop fusion does not allow the inner loops to be @@ -48,72 +45,69 @@ ; limitations that can be addressed in future improvements to fusion. define void @dep_free() { bb: - br label %bb13 + br label %bb16 -bb13: ; preds = %bb27, %bb - %indvars.iv10 = phi i64 [ %indvars.iv.next11, %bb27 ], [ 0, %bb ] - %.0 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ] - %exitcond12 = icmp ne i64 %indvars.iv10, 100 - br i1 %exitcond12, label %bb16, label %bb30 +bb16: ; preds = %bb, %bb27 + %.06 = phi i32 [ 0, %bb ], [ %tmp28, %bb27 ] + %indvars.iv105 = phi i64 [ 0, %bb ], [ %indvars.iv.next11, %bb27 ] + br label %bb18 -bb16: ; preds = %bb25, %bb13 - %indvars.iv7 = phi i64 [ %indvars.iv.next8, %bb25 ], [ 0, %bb13 ] - %exitcond9 = icmp ne i64 %indvars.iv7, 100 - br i1 %exitcond9, label %bb18, label %bb27 +bb30: ; preds = %bb27 + br label %bb33 -bb18: ; preds = %bb16 - %tmp = add nsw i32 %.0, -3 - %tmp19 = add nuw nsw i64 %indvars.iv10, 3 +bb18: ; preds = %bb16, %bb25 + %indvars.iv74 = phi i64 [ 0, %bb16 ], [ %indvars.iv.next8, %bb25 ] + %tmp = add nsw i32 %.06, -3 + %tmp19 = add nuw nsw i64 %indvars.iv105, 3 %tmp20 = trunc i64 %tmp19 to i32 %tmp21 = mul nsw i32 %tmp, %tmp20 - %tmp22 = trunc i64 %indvars.iv10 to i32 + %tmp22 = trunc i64 %indvars.iv105 to i32 %tmp23 = srem i32 %tmp21, %tmp22 - %tmp24 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @A, i64 0, i64 %indvars.iv10, i64 %indvars.iv7 + %tmp24 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @A, i64 0, i64 %indvars.iv105, i64 %indvars.iv74 store i32 %tmp23, i32* %tmp24, align 4 br label %bb25 bb25: ; preds = %bb18 - %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1 - br label %bb16 + %indvars.iv.next8 = add nuw nsw i64 %indvars.iv74, 1 + %exitcond9 = icmp ne i64 %indvars.iv.next8, 100 + br i1 %exitcond9, label %bb18, label %bb27 -bb27: ; preds = %bb16 - %indvars.iv.next11 = add nuw nsw i64 %indvars.iv10, 1 - %tmp28 = add nuw nsw i32 %.0, 1 - br label %bb13 +bb27: ; preds = %bb25 + %indvars.iv.next11 = add nuw nsw i64 %indvars.iv105, 1 + %tmp28 = add nuw nsw i32 %.06, 1 + %exitcond12 = icmp ne i64 %indvars.iv.next11, 100 + br i1 %exitcond12, label %bb16, label %bb30 -bb30: ; preds = %bb45, %bb13 - %indvars.iv4 = phi i64 [ %indvars.iv.next5, %bb45 ], [ 0, %bb13 ] - %.02 = phi i32 [ 0, %bb13 ], [ %tmp46, %bb45 ] - %exitcond6 = icmp ne i64 %indvars.iv4, 100 - br i1 %exitcond6, label %bb33, label %bb31 +bb33: ; preds = %bb30, %bb45 + %.023 = phi i32 [ 0, %bb30 ], [ %tmp46, %bb45 ] + %indvars.iv42 = phi i64 [ 0, %bb30 ], [ %indvars.iv.next5, %bb45 ] + br label %bb35 -bb31: ; preds = %bb30 +bb31: ; preds = %bb45 br label %bb47 -bb33: ; preds = %bb43, %bb30 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb43 ], [ 0, %bb30 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb35, label %bb45 - -bb35: ; preds = %bb33 - %tmp36 = add nsw i32 %.02, -3 - %tmp37 = add nuw nsw i64 %indvars.iv4, 3 +bb35: ; preds = %bb33, %bb43 + %indvars.iv1 = phi i64 [ 0, %bb33 ], [ %indvars.iv.next, %bb43 ] + %tmp36 = add nsw i32 %.023, -3 + %tmp37 = add nuw nsw i64 %indvars.iv42, 3 %tmp38 = trunc i64 %tmp37 to i32 %tmp39 = mul nsw i32 %tmp36, %tmp38 - %tmp40 = trunc i64 %indvars.iv4 to i32 + %tmp40 = trunc i64 %indvars.iv42 to i32 %tmp41 = srem i32 %tmp39, %tmp40 - %tmp42 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @B, i64 0, i64 %indvars.iv4, i64 %indvars.iv + %tmp42 = getelementptr inbounds [1024 x [1024 x i32]], [1024 x [1024 x i32]]* @B, i64 0, i64 %indvars.iv42, i64 %indvars.iv1 store i32 %tmp41, i32* %tmp42, align 4 br label %bb43 bb43: ; preds = %bb35 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb33 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %bb35, label %bb45 -bb45: ; preds = %bb33 - %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1 - %tmp46 = add nuw nsw i32 %.02, 1 - br label %bb30 +bb45: ; preds = %bb43 + %indvars.iv.next5 = add nuw nsw i64 %indvars.iv42, 1 + %tmp46 = add nuw nsw i32 %.023, 1 + %exitcond6 = icmp ne i64 %indvars.iv.next5, 100 + br i1 %exitcond6, label %bb33, label %bb31 bb47: ; preds = %bb31 ret void diff --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll --- a/llvm/test/Transforms/LoopFusion/simple.ll +++ b/llvm/test/Transforms/LoopFusion/simple.ll @@ -6,68 +6,61 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] ; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] ; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] +; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]] +; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %{{.*}} ; CHECK: ret void define void @dep_free(i32* noalias %arg) { bb: - br label %bb5 + br label %bb7 -bb5: ; preds = %bb14, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ] - %.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb17 - -bb7: ; preds = %bb5 - %tmp = add nsw i32 %.01, -3 - %tmp8 = add nuw nsw i64 %indvars.iv2, 3 +bb7: ; preds = %bb, %bb14 + %.014 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] + %indvars.iv23 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb14 ] + %tmp = add nsw i32 %.014, -3 + %tmp8 = add nuw nsw i64 %indvars.iv23, 3 %tmp9 = trunc i64 %tmp8 to i32 %tmp10 = mul nsw i32 %tmp, %tmp9 - %tmp11 = trunc i64 %indvars.iv2 to i32 + %tmp11 = trunc i64 %indvars.iv23 to i32 %tmp12 = srem i32 %tmp10, %tmp11 - %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 + %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23 store i32 %tmp12, i32* %tmp13, align 4 br label %bb14 bb14: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - %tmp15 = add nuw nsw i32 %.01, 1 - br label %bb5 - -bb17: ; preds = %bb27, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb27 ], [ 0, %bb5 ] - %.0 = phi i32 [ 0, %bb5 ], [ %tmp28, %bb27 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb19, label %bb18 - -bb18: ; preds = %bb17 - br label %bb29 - -bb19: ; preds = %bb17 - %tmp20 = add nsw i32 %.0, -3 - %tmp21 = add nuw nsw i64 %indvars.iv, 3 + %indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1 + %tmp15 = add nuw nsw i32 %.014, 1 + %exitcond4 = icmp ne i64 %indvars.iv.next3, 100 + br i1 %exitcond4, label %bb7, label %bb17.preheader + +bb17.preheader: ; preds = %bb14 + br label %bb19 + +bb19: ; preds = %bb17.preheader, %bb27 + %.02 = phi i32 [ 0, %bb17.preheader ], [ %tmp28, %bb27 ] + %indvars.iv1 = phi i64 [ 0, %bb17.preheader ], [ %indvars.iv.next, %bb27 ] + %tmp20 = add nsw i32 %.02, -3 + %tmp21 = add nuw nsw i64 %indvars.iv1, 3 %tmp22 = trunc i64 %tmp21 to i32 %tmp23 = mul nsw i32 %tmp20, %tmp22 - %tmp24 = trunc i64 %indvars.iv to i32 + %tmp24 = trunc i64 %indvars.iv1 to i32 %tmp25 = srem i32 %tmp23, %tmp24 - %tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + %tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1 store i32 %tmp25, i32* %tmp26, align 4 br label %bb27 bb27: ; preds = %bb19 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %tmp28 = add nuw nsw i32 %.0, 1 - br label %bb17 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %tmp28 = add nuw nsw i32 %.02, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %bb19, label %bb18 + +bb18: ; preds = %bb27 + br label %bb29 bb29: ; preds = %bb18 ret void @@ -75,64 +68,58 @@ ; CHECK: void @dep_free_parametric ; CHECK-NEXT: bb: -; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] +; CHECK: br i1 %{{.*}}, label %[[LOOP1PREHEADER:bb[0-9.a-z]*]], label %[[EXITBLOCK:bb[0-9]*]] +; CHECK: [[LOOP1PREHEADER]] +; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] ; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] ; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] +; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]] +; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[EXITBLOCK]] ; CHECK: ret void define void @dep_free_parametric(i32* noalias %arg, i64 %arg2) { bb: - br label %bb3 + %tmp3 = icmp slt i64 0, %arg2 + br i1 %tmp3, label %bb5, label %bb15.preheader -bb3: ; preds = %bb12, %bb - %.01 = phi i64 [ 0, %bb ], [ %tmp13, %bb12 ] - %tmp = icmp slt i64 %.01, %arg2 - br i1 %tmp, label %bb5, label %bb15 - -bb5: ; preds = %bb3 - %tmp6 = add nsw i64 %.01, -3 - %tmp7 = add nuw nsw i64 %.01, 3 +bb5: ; preds = %bb5, %bb12 + %.014 = phi i64 [ 0, %bb ], [ %tmp13, %bb12 ] + %tmp6 = add nsw i64 %.014, -3 + %tmp7 = add nuw nsw i64 %.014, 3 %tmp8 = mul nsw i64 %tmp6, %tmp7 - %tmp9 = srem i64 %tmp8, %.01 + %tmp9 = srem i64 %tmp8, %.014 %tmp10 = trunc i64 %tmp9 to i32 - %tmp11 = getelementptr inbounds i32, i32* %arg, i64 %.01 + %tmp11 = getelementptr inbounds i32, i32* %arg, i64 %.014 store i32 %tmp10, i32* %tmp11, align 4 br label %bb12 bb12: ; preds = %bb5 - %tmp13 = add nuw nsw i64 %.01, 1 - br label %bb3 - -bb15: ; preds = %bb25, %bb3 - %.0 = phi i64 [ 0, %bb3 ], [ %tmp26, %bb25 ] - %tmp16 = icmp slt i64 %.0, %arg2 - br i1 %tmp16, label %bb18, label %bb17 - -bb17: ; preds = %bb15 - br label %bb27 - -bb18: ; preds = %bb15 - %tmp19 = add nsw i64 %.0, -3 - %tmp20 = add nuw nsw i64 %.0, 3 + %tmp13 = add nuw nsw i64 %.014, 1 + %tmp = icmp slt i64 %tmp13, %arg2 + br i1 %tmp, label %bb5, label %bb15.preheader + +bb15.preheader: ; preds = %bb12, %bb + %tmp161 = icmp slt i64 0, %arg2 + br i1 %tmp161, label %bb18, label %bb27 + +bb18: ; preds = %bb15.preheader, %bb25 + %.02 = phi i64 [ 0, %bb15.preheader ], [ %tmp26, %bb25 ] + %tmp19 = add nsw i64 %.02, -3 + %tmp20 = add nuw nsw i64 %.02, 3 %tmp21 = mul nsw i64 %tmp19, %tmp20 - %tmp22 = srem i64 %tmp21, %.0 + %tmp22 = srem i64 %tmp21, %.02 %tmp23 = trunc i64 %tmp22 to i32 - %tmp24 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.0 + %tmp24 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %.02 store i32 %tmp23, i32* %tmp24, align 4 br label %bb25 bb25: ; preds = %bb18 - %tmp26 = add nuw nsw i64 %.0, 1 - br label %bb15 + %tmp26 = add nuw nsw i64 %.02, 1 + %tmp16 = icmp slt i64 %tmp26, %arg2 + br i1 %tmp16, label %bb18, label %bb27 bb27: ; preds = %bb17 ret void @@ -142,113 +129,87 @@ ; CHECK-NEXT: bb: ; CHECK-NEXT: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] ; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] ; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] +; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]] +; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %{{.*}} ; CHECK: ret void define void @raw_only(i32* noalias %arg) { bb: - br label %bb5 + br label %bb7 -bb5: ; preds = %bb9, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb9 ], [ 0, %bb ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb11 +bb11.preheader: ; preds = %bb9 + br label %bb13 -bb7: ; preds = %bb5 - %tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 - %tmp8 = trunc i64 %indvars.iv2 to i32 +bb7: ; preds = %bb, %bb9 + %indvars.iv22 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb9 ] + %tmp = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv22 + %tmp8 = trunc i64 %indvars.iv22 to i32 store i32 %tmp8, i32* %tmp, align 4 br label %bb9 bb9: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - br label %bb5 - -bb11: ; preds = %bb18, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb18 ], [ 0, %bb5 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb13, label %bb19 + %indvars.iv.next3 = add nuw nsw i64 %indvars.iv22, 1 + %exitcond4 = icmp ne i64 %indvars.iv.next3, 100 + br i1 %exitcond4, label %bb7, label %bb11.preheader -bb13: ; preds = %bb11 - %tmp14 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv +bb13: ; preds = %bb11.preheader, %bb18 + %indvars.iv1 = phi i64 [ 0, %bb11.preheader ], [ %indvars.iv.next, %bb18 ] + %tmp14 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv1 %tmp15 = load i32, i32* %tmp14, align 4 %tmp16 = shl nsw i32 %tmp15, 1 - %tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + %tmp17 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1 store i32 %tmp16, i32* %tmp17, align 4 br label %bb18 bb18: ; preds = %bb13 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb11 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 100 br i1 %exitcond, label %bb13, label %bb19 -bb19: ; preds = %bb11 +bb19: ; preds = %bb18 ret void } ; CHECK: void @raw_only_parametric ; CHECK-NEXT: bb: +; CHECK: br i1 %{{.*}}, label %[[LOOP1PREHEADER:bb[0-9.a-z]*]], label %[[EXITBLOCK:bb[0-9]*]] +; CHECK: [[LOOP1PREHEADER]] ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] -; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] -; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] -; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] -; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]*]], label %[[LOOP2HEADER]] +; CHECK: [[LOOP2HEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %[[EXITBLOCK]] ; CHECK: ret void define void @raw_only_parametric(i32* noalias %arg, i32 %arg4) { bb: - br label %bb5 - -bb5: ; preds = %bb11, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb11 ], [ 0, %bb ] %tmp = sext i32 %arg4 to i64 - %tmp6 = icmp slt i64 %indvars.iv2, %tmp - br i1 %tmp6, label %bb8, label %bb14 + %tmp64 = icmp sgt i32 %arg4, 0 + br i1 %tmp64, label %bb8, label %bb23 -bb8: ; preds = %bb5 - %tmp9 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 - %tmp10 = trunc i64 %indvars.iv2 to i32 +bb8: ; preds = %bb, %bb8 + %indvars.iv25 = phi i64 [ %indvars.iv.next3, %bb8 ], [ 0, %bb ] + %tmp9 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv25 + %tmp10 = trunc i64 %indvars.iv25 to i32 store i32 %tmp10, i32* %tmp9, align 4 - br label %bb11 - -bb11: ; preds = %bb8 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - br label %bb5 - -bb14: ; preds = %bb22, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb22 ], [ 0, %bb5 ] - %tmp13 = sext i32 %arg4 to i64 - %tmp15 = icmp slt i64 %indvars.iv, %tmp13 - br i1 %tmp15, label %bb17, label %bb23 + %indvars.iv.next3 = add nuw nsw i64 %indvars.iv25, 1 + %tmp6 = icmp slt i64 %indvars.iv.next3, %tmp + br i1 %tmp6, label %bb8, label %bb17 -bb17: ; preds = %bb14 - %tmp18 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv +bb17: ; preds = %bb8, %bb17 + %indvars.iv3 = phi i64 [ %indvars.iv.next, %bb17 ], [ 0, %bb8 ] + %tmp18 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv3 %tmp19 = load i32, i32* %tmp18, align 4 %tmp20 = shl nsw i32 %tmp19, 1 - %tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv + %tmp21 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv3 store i32 %tmp20, i32* %tmp21, align 4 - br label %bb22 - -bb22: ; preds = %bb17 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb14 + %indvars.iv.next = add nuw nsw i64 %indvars.iv3, 1 + %tmp15 = icmp slt i64 %indvars.iv.next, %tmp + br i1 %tmp15, label %bb17, label %bb23 -bb23: ; preds = %bb14 +bb23: ; preds = %bb17, %bb ret void } @@ -256,62 +217,52 @@ ; CHECK-NEXT: bb: ; CHECK: br label %[[LOOP1HEADER:bb[0-9]*]] ; CHECK: [[LOOP1HEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP1BODY:bb[0-9]*]], label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP1BODY]] ; CHECK: br label %[[LOOP1LATCH:bb[0-9]*]] ; CHECK: [[LOOP1LATCH]] -; CHECK: br label %[[LOOP2PREHEADER:bb[0-9]+]] -; CHECK: [[LOOP2PREHEADER]] -; CHECK: br i1 %{{.*}}, label %[[LOOP2BODY:bb[0-9]*]], label %[[LOOP2EXIT:bb[0-9]*]] -; CHECK: [[LOOP2BODY]] +; CHECK: br i1 %{{.*}}, label %[[LOOP2HEADER:bb[0-9]+]], label %[[LOOP2HEADER]] +; CHECK: [[LOOP2HEADER]] ; CHECK: br label %[[LOOP2LATCH:bb[0-9]+]] ; CHECK: [[LOOP2LATCH]] -; CHECK: br label %[[LOOP1HEADER]] +; CHECK: br i1 %{{.*}}, label %[[LOOP1HEADER]], label %{{.*}} ; CHECK: ret void define void @forward_dep(i32* noalias %arg) { bb: - br label %bb5 - -bb5: ; preds = %bb14, %bb - %indvars.iv2 = phi i64 [ %indvars.iv.next3, %bb14 ], [ 0, %bb ] - %.01 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] - %exitcond4 = icmp ne i64 %indvars.iv2, 100 - br i1 %exitcond4, label %bb7, label %bb17 + br label %bb7 -bb7: ; preds = %bb5 - %tmp = add nsw i32 %.01, -3 - %tmp8 = add nuw nsw i64 %indvars.iv2, 3 +bb7: ; preds = %bb, %bb14 + %.013 = phi i32 [ 0, %bb ], [ %tmp15, %bb14 ] + %indvars.iv22 = phi i64 [ 0, %bb ], [ %indvars.iv.next3, %bb14 ] + %tmp = add nsw i32 %.013, -3 + %tmp8 = add nuw nsw i64 %indvars.iv22, 3 %tmp9 = trunc i64 %tmp8 to i32 %tmp10 = mul nsw i32 %tmp, %tmp9 - %tmp11 = trunc i64 %indvars.iv2 to i32 + %tmp11 = trunc i64 %indvars.iv22 to i32 %tmp12 = srem i32 %tmp10, %tmp11 - %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv2 + %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv22 store i32 %tmp12, i32* %tmp13, align 4 br label %bb14 bb14: ; preds = %bb7 - %indvars.iv.next3 = add nuw nsw i64 %indvars.iv2, 1 - %tmp15 = add nuw nsw i32 %.01, 1 - br label %bb5 - -bb17: ; preds = %bb25, %bb5 - %indvars.iv = phi i64 [ %indvars.iv.next, %bb25 ], [ 0, %bb5 ] - %exitcond = icmp ne i64 %indvars.iv, 100 - br i1 %exitcond, label %bb19, label %bb26 - -bb19: ; preds = %bb17 - %tmp20 = add nsw i64 %indvars.iv, -3 + %indvars.iv.next3 = add nuw nsw i64 %indvars.iv22, 1 + %tmp15 = add nuw nsw i32 %.013, 1 + %exitcond4 = icmp ne i64 %indvars.iv.next3, 100 + br i1 %exitcond4, label %bb7, label %bb19 + +bb19: ; preds = %bb14, %bb25 + %indvars.iv1 = phi i64 [ 0, %bb14 ], [ %indvars.iv.next, %bb25 ] + %tmp20 = add nsw i64 %indvars.iv1, -3 %tmp21 = getelementptr inbounds i32, i32* %arg, i64 %tmp20 %tmp22 = load i32, i32* %tmp21, align 4 %tmp23 = mul nsw i32 %tmp22, 3 - %tmp24 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv + %tmp24 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv1 store i32 %tmp23, i32* %tmp24, align 4 br label %bb25 bb25: ; preds = %bb19 - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - br label %bb17 + %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 100 + br i1 %exitcond, label %bb19, label %bb26 -bb26: ; preds = %bb17 +bb26: ; preds = %bb25 ret void }