Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -137,6 +137,7 @@ /// void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequiredID(LoopSimplifyID); @@ -235,6 +236,7 @@ INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) @@ -516,8 +518,8 @@ /// the analysis failed (no benefits expected from the unrolling, or the loop is /// too big to analyze), the returned value is None. Optional -analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, ScalarEvolution &SE, - const TargetTransformInfo &TTI, +analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT, + ScalarEvolution &SE, const TargetTransformInfo &TTI, unsigned MaxUnrolledLoopSize) { // We want to be able to scale offsets by the trip count and add more offsets // to them without checking for overflows, and we already don't want to @@ -532,6 +534,7 @@ SmallSetVector BBWorklist; DenseMap SimplifiedValues; + SmallVector, 4> SimplifiedInputValues; // The estimated cost of the unrolled form of the loop. We try to estimate // this by simplifying as much as we can while computing the estimate. @@ -543,6 +546,12 @@ // unrolling. unsigned RolledDynamicCost = 0; + // Ensure that we don't violate the loop structure invariants relied on by + // this analysis. + assert(L->isLoopSimplifyForm() && "Must put loop into normal form first."); + assert(L->isLCSSAForm(DT) && + "Must have loops in LCSSA form to track live-out values."); + DEBUG(dbgs() << "Starting LoopUnroll profitability analysis...\n"); // Simulate execution of each iteration of the loop counting instructions, @@ -551,7 +560,34 @@ // we literally have to go through all loop's iterations. for (unsigned Iteration = 0; Iteration < TripCount; ++Iteration) { DEBUG(dbgs() << " Analyzing iteration " << Iteration << "\n"); + + // Prepare for the iteration by collecting any simplified entry or backedge + // inputs. + for (Instruction &I : *L->getHeader()) { + auto *PHI = dyn_cast(&I); + if (!PHI) + break; + + // The loop header PHI nodes must have exactly two input: one from the + // loop preheader and one from the loop latch. + assert( + PHI->getNumIncomingValues() == 2 && + "Must have an incoming value only for the preheader and the latch."); + + Value *V = PHI->getIncomingValueForBlock( + Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch()); + Constant *C = dyn_cast(V); + if (Iteration != 0 && !C) + C = SimplifiedValues.lookup(V); + if (C) + SimplifiedInputValues.push_back({PHI, C}); + } + + // Now clear and re-populate the map for the next iteration. SimplifiedValues.clear(); + while (!SimplifiedInputValues.empty()) + SimplifiedValues.insert(SimplifiedInputValues.pop_back_val()); + UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, L, SE); BBWorklist.clear(); @@ -849,6 +885,7 @@ Function &F = *L->getHeader()->getParent(); + auto &DT = getAnalysis().getDomTree(); LoopInfo *LI = &getAnalysis().getLoopInfo(); ScalarEvolution *SE = &getAnalysis(); const TargetTransformInfo &TTI = @@ -930,8 +967,9 @@ // The loop isn't that small, but we still can fully unroll it if that // helps to remove a significant number of instructions. // To check that, run additional analysis on the loop. - if (Optional Cost = analyzeLoopUnrollCost( - L, TripCount, *SE, TTI, Threshold + DynamicCostSavingsDiscount)) + if (Optional Cost = + analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI, + Threshold + DynamicCostSavingsDiscount)) if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold, DynamicCostSavingsDiscount, Cost->UnrolledCost, Cost->RolledDynamicCost)) { Index: test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/full-unroll-heuristics-phi-prop.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define i64 @propagate_loop_phis() { +; CHECK-LABEL: @propagate_loop_phis( +; CHECK-NOT: br i1 +; CHECK: ret i64 15 +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %inc, %loop ] + %x0 = phi i64 [ 0, %entry ], [ %x9, %loop ] + %x1 = or i64 %x0, 1 + %x2 = or i64 %x1, 2 + %x3 = or i64 %x2, 3 + %x4 = or i64 %x3, 4 + %x5 = or i64 %x4, 5 + %x6 = or i64 %x5, 6 + %x7 = or i64 %x6, 7 + %x8 = or i64 %x7, 8 + %x9 = or i64 %x8, 9 + %inc = add nuw nsw i64 %iv, 1 + %cond = icmp sge i64 %inc, 10 + br i1 %cond, label %loop.end, label %loop + +loop.end: + %x.lcssa = phi i64 [ %x9, %loop ] + ret i64 %x.lcssa +}