Index: lib/Transforms/Utils/LoopUnrollPeel.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollPeel.cpp +++ lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -618,8 +618,12 @@ assert(DT->verify(DominatorTree::VerificationLevel::Fast)); } - updateBranchWeights(InsertBot, cast(VMap[LatchBR]), Iter, + auto *LatchBRCopy = cast(VMap[LatchBR]); + updateBranchWeights(InsertBot, LatchBRCopy, Iter, PeelCount, ExitWeight); + // Remove Loop metadata from the latch branch instruction + // because it is not the Loop's latch branch anymore. + LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr); InsertTop = InsertBot; InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI); Index: test/Transforms/LoopUnroll/peel-loop-conditions.ll =================================================================== --- test/Transforms/LoopUnroll/peel-loop-conditions.ll +++ test/Transforms/LoopUnroll/peel-loop-conditions.ll @@ -82,12 +82,14 @@ for.inc: %inc = add nsw i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.end + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !1 for.end: ret void } +!1 = distinct !{!1} + ; Check we peel off the maximum number of iterations that make conditions true. define void @test2(i32 %k) { ; CHECK-LABEL: @test2( @@ -233,12 +235,14 @@ for.inc: %inc = add nsw i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.end + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !2 for.end: ret void } +!2 = distinct !{!2} + ; Check that we can peel off iterations that make a condition false. define void @test3(i32 %k) { ; CHECK-LABEL: @test3( @@ -332,12 +336,14 @@ for.inc: %inc = add nsw i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.end + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !3 for.end: ret void } +!3 = distinct !{!3} + ; Test that we only peel off iterations if it simplifies a condition in the ; loop body after peeling at most MaxPeelCount iterations. define void @test4(i32 %k) { @@ -373,12 +379,14 @@ for.inc: %inc = add nsw i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.end + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !4 for.end: ret void } +!4 = distinct !{!4} + ; In this case we cannot peel the inner loop, because the condition involves ; the outer induction variable. define void @test5(i32 %k) { @@ -432,18 +440,21 @@ for.inc: %inc = add nsw i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %outer.inc + br i1 %cmp, label %for.body, label %outer.inc, !llvm.loop !5 outer.inc: %j.inc = add nsw i32 %j, 1 %outer.cmp = icmp slt i32 %j.inc, %k - br i1 %outer.cmp, label %outer.header, label %for.end + br i1 %outer.cmp, label %outer.header, label %for.end, !llvm.loop !6 for.end: ret void } +!5 = distinct !{!5} +!6 = distinct !{!6} + ; In this test, the condition involves 2 AddRecs. Without evaluating both ; AddRecs, we cannot prove that the condition becomes known in the loop body ; after peeling. @@ -491,12 +502,14 @@ %inc = add nsw i32 %i.05, 2 %j.inc = add nsw i32 %j, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.end + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !7 for.end: ret void } +!7 = distinct !{!7} + define void @test7(i32 %k) { ; FIXME: Could simplify loop body by peeling one additional iteration after ; i != 3 becomes false @@ -532,12 +545,14 @@ for.inc: %inc = add nsw i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.end + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !8 for.end: ret void } +!8 = distinct !{!8} + define void @test8(i32 %k) { ; FIXME: Could simplify loop body by peeling one additional iteration after ; i == 3 becomes true. @@ -573,12 +588,14 @@ for.inc: %inc = add nsw i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.end + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !9 for.end: ret void } +!9 = distinct !{!9} + ; Comparison with non-monotonic predicate due to possible wrapping, loop ; body cannot be simplified. define void @test9(i32 %k) { @@ -614,8 +631,10 @@ for.inc: %inc = add i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.end + br i1 %cmp, label %for.body, label %for.end, !llvm.loop !10 for.end: ret void } + +!10 = distinct !{!10} Index: test/Transforms/LoopUnroll/peel-loop.ll =================================================================== --- test/Transforms/LoopUnroll/peel-loop.ll +++ test/Transforms/LoopUnroll/peel-loop.ll @@ -37,7 +37,7 @@ store i32 %i.05, i32* %p.addr.04, align 4 %inc = add nsw i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !llvm.loop !1 for.cond.for.end_crit_edge: ; preds = %for.body br label %for.end @@ -46,6 +46,8 @@ ret void } +!1 = distinct !{!1} + ; Make sure peeling works correctly when a value defined in a loop is used ; in later code - we need to correctly plumb the phi depending on which ; iteration is actually used. @@ -84,7 +86,7 @@ store i32 %i.05, i32* %p.addr.04, align 4 %inc = add nsw i32 %i.05, 1 %cmp = icmp slt i32 %inc, %k - br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge, !llvm.loop !2 for.cond.for.end_crit_edge: ; preds = %for.body br label %for.end @@ -93,3 +95,5 @@ %ret = phi i32 [ 0, %entry], [ %inc, %for.cond.for.end_crit_edge ] ret i32 %ret } + +!2 = distinct !{!2}