Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -687,6 +687,64 @@ return false; } +// Returns number of instructions that could be optimized out if a loop +// get unrolled. Initially based only on the following structure. +// +// loop: +// R.prev = phi[R, ... +// R = phi [R.next, ... +// 2 Uses of R or Rnext or R.prev +// R.next calculation depends on R +// loopexit: +// 1 Use of R or Rnext or R.prev +// +// Without unroll we will need a move to store R.prev value. +// With unroll on 2 or more R.prev and R just go to different +// registers. +static unsigned ForceUnrollProfitability(Loop *L) { + BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); + // By default no instructions are optimized out. + unsigned OptInsns = 0; + if (!Latch || !Header) + return OptInsns; + for (Instruction &BBI : *Header) { + PHINode *PN = dyn_cast(&BBI); + // Exit when we've passed all PHI nodes. + if (!PN) + break; + // To simplify calculations we do not check that PN represents a + // recurrency (which could be a very long chain) potentially hitting + // some extra cases. + PHINode *PrevPN = dyn_cast(PN->getIncomingValueForBlock(Latch)); + if (!PrevPN) + continue; + SmallVector PNs; + PNs.push_back(PN); + if (PN != PrevPN) + PNs.push_back(PrevPN); + unsigned PNLoopUses = 0, PNUses = 0; + for (PHINode *Phi : PNs) { + if (PNLoopUses >= 2 && PNUses >= 1) + break; + for (User *U : Phi->users()) { + Instruction *I = dyn_cast(U); + if (!I) + continue; + if (L->contains(I)) + PNLoopUses++; + else + PNUses++; + if (PNLoopUses >= 2 && PNUses >= 1) { + OptInsns++; + break; + } + } + } + } + return OptInsns; +} + // Returns true if unroll count was set explicitly. // Calculates unroll count and writes it to UP.Count. static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, @@ -886,6 +944,14 @@ " time(s)."); } + // Check if forced unroll is profitable. + if (!ExplicitUnroll) { + unsigned NewBEInsns = ForceUnrollProfitability(L); + if (NewBEInsns && + (LoopSize - NewBEInsns) * UP.Count + NewBEInsns < UP.Threshold) + UP.Force = true; + } + if (UP.Count > UP.MaxCount) UP.Count = UP.MaxCount; DEBUG(dbgs() << " partially unrolling with count: " << UP.Count << "\n"); Index: test/Transforms/LoopUnroll/unroll-force.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-force.ll +++ test/Transforms/LoopUnroll/unroll-force.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -S -unroll-runtime -loop-unroll -unroll-max-count=2 | FileCheck %s +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +; The test checks unroll for uncountable loop. + +; CHECK: if.then: +; CHECK: if.then.1: + +%struct.list = type { %struct.list*, i32 } + +@head = global %struct.list { %struct.list* null, i32 1 }, align 8 + +; Function Attrs: norecurse nounwind uwtable +define i32 @main(i32 %argc, i8** nocapture readnone %argv) { +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.list, %struct.list* @head, i64 0, i32 1), align 8 + br label %if.then + +if.then: ; preds = %entry, %if.then.for.cond_crit_edge + %prev.024 = phi %struct.list* [ @head, %entry ], [ %cur.023, %if.then.for.cond_crit_edge ] + %cur.023 = phi %struct.list* [ @head, %entry ], [ %2, %if.then.for.cond_crit_edge ] + %1 = phi i32 [ %0, %entry ], [ %.pre, %if.then.for.cond_crit_edge ] + %next = getelementptr inbounds %struct.list, %struct.list* %cur.023, i64 0, i32 0 + %2 = load %struct.list*, %struct.list** %next, align 8 + %tobool = icmp eq %struct.list* %2, null + br i1 %tobool, label %if.else, label %if.then.for.cond_crit_edge + +if.then.for.cond_crit_edge: ; preds = %if.then + %val.phi.trans.insert = getelementptr inbounds %struct.list, %struct.list* %cur.023, i64 0, i32 1 + %.pre = load i32, i32* %val.phi.trans.insert, align 8 + %cmp = icmp sgt i32 %.pre, %0 + br i1 %cmp, label %for.end.loopexit, label %if.then + +if.else: ; preds = %if.then + %next3 = getelementptr inbounds %struct.list, %struct.list* %prev.024, i64 0, i32 0 + store %struct.list* null, %struct.list** %next3, align 8 + br label %for.end + +for.end.loopexit: ; preds = %if.then.for.cond_crit_edge + br label %for.end + +for.end: ; preds = %for.end.loopexit, %if.else + %3 = phi i32 [ %1, %if.else ], [ %.pre, %for.end.loopexit ] + ret i32 %3 +}