Index: llvm/lib/Transforms/Utils/LoopUnroll.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -369,8 +369,8 @@ // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime // flag is specified. - bool RuntimeTripCount = - !CompletelyUnroll && ULO.TripCount == 0 && ULO.AllowRuntime; + bool RuntimeTripCount = !CompletelyUnroll && ULO.TripCount == 0 && + ULO.TripMultiple % ULO.Count != 0 && ULO.AllowRuntime; // Go through all exits of L and see if there are any phi-nodes there. We just // conservatively assume that they're inserted to preserve LCSSA form, which @@ -418,7 +418,7 @@ UnrollRuntimeEpilog.getNumOccurrences() ? UnrollRuntimeEpilog : isEpilogProfitable(L); - if (RuntimeTripCount && ULO.TripMultiple % ULO.Count != 0 && + if (RuntimeTripCount && !UnrollRuntimeLoopRemainder(L, ULO.Count, ULO.AllowExpensiveTripCount, EpilogProfitability, ULO.UnrollRemainder, ULO.ForgetAllSCEV, LI, SE, DT, AC, TTI, @@ -432,62 +432,34 @@ } } - // If we know the trip count, we know the multiple... - // TODO: This is only used for the ORE code, remove it. - unsigned BreakoutTrip = 0; - if (ULO.TripCount != 0) { - BreakoutTrip = ULO.TripCount % ULO.Count; - ULO.TripMultiple = 0; - } else { - // Figure out what multiple to use. - BreakoutTrip = ULO.TripMultiple = - (unsigned)GreatestCommonDivisor64(ULO.Count, ULO.TripMultiple); - } - using namespace ore; // Report the unrolling decision. if (CompletelyUnroll) { LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() - << " with trip count " << ULO.TripCount << "!\n"); + << " with trip count " << ULO.Count << "!\n"); if (ORE) ORE->emit([&]() { return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), L->getHeader()) << "completely unrolled loop with " - << NV("UnrollCount", ULO.TripCount) << " iterations"; + << NV("UnrollCount", ULO.Count) << " iterations"; }); } else { - auto DiagBuilder = [&]() { - OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), - L->getHeader()); - return Diag << "unrolled loop by a factor of " - << NV("UnrollCount", ULO.Count); - }; - LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << ULO.Count); - if (ULO.TripMultiple == 0 || BreakoutTrip != ULO.TripMultiple) { - LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); - if (ORE) - ORE->emit([&]() { - return DiagBuilder() << " with a breakout at trip " - << NV("BreakoutTrip", BreakoutTrip); - }); - } else if (ULO.TripMultiple != 1) { - LLVM_DEBUG(dbgs() << " with " << ULO.TripMultiple << " trips per branch"); - if (ORE) - ORE->emit([&]() { - return DiagBuilder() - << " with " << NV("TripMultiple", ULO.TripMultiple) - << " trips per branch"; - }); - } else if (RuntimeTripCount) { + if (RuntimeTripCount) LLVM_DEBUG(dbgs() << " with run-time trip count"); - if (ORE) - ORE->emit( - [&]() { return DiagBuilder() << " with run-time trip count"; }); - } LLVM_DEBUG(dbgs() << "!\n"); + + if (ORE) + ORE->emit([&]() { + OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), + L->getHeader()); + Diag << "unrolled loop by a factor of " << NV("UnrollCount", ULO.Count); + if (RuntimeTripCount) + Diag << " with run-time trip count"; + return Diag; + }); } // We are going to make changes to this loop. SCEV may be keeping cached info Index: llvm/test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll +++ llvm/test/Transforms/LoopUnroll/loop-remarks-with-hotness.ll @@ -2,7 +2,7 @@ ; RUN: opt < %s -S -loop-unroll -pass-remarks=loop-unroll -pass-remarks-with-hotness -unroll-count=4 2>&1 | FileCheck -check-prefix=PARTIAL-UNROLL %s ; COMPLETE-UNROLL: remark: {{.*}}: completely unrolled loop with 16 iterations (hotness: 300) -; PARTIAL-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4 {{.*}} (hotness: 300) +; PARTIAL-UNROLL: remark: {{.*}}: unrolled loop by a factor of 4 (hotness: 300) define i32 @sum() !prof !0 { entry: