Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -639,6 +639,17 @@ Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2); while (Count != 0 && TripCount % Count != 0) Count--; + if (Count <= 1 || (LoopSize - 2) * Count + 2 > UP.PartialThreshold) { + // If there is no Count that is modulo of TripCount or we still + // exceed threshold, set Count to largest power-of-two factor that + // satisfies the threshold limit. + Count = (std::max(UP.PartialThreshold, 3u)-2) / (LoopSize-2); + UnrolledSize = (LoopSize - 2) * Count + 2; + while (Count != 0 && UnrolledSize > UP.PartialThreshold) { + Count >>= 1; + UnrolledSize = (LoopSize - 2) * Count + 2; + } + } } } else if (Unrolling == Runtime) { if (!AllowRuntime && !CountSetExplicitly) { Index: test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll =================================================================== --- test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll +++ test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll @@ -0,0 +1,30 @@ +; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -O2 | FileCheck %s + +; The Loop TripCount is 9. However unroll factors 3 or 9 exceed given threshold. +; The test check that we choose a smaller, power-of-two, unroll count and do not give up on unrolling. + +; CHECK: for.body.1: +; CHECK: store +; CHECK: store +; CHECK-NOT: store +; CHECK: for.end + +define void @foo(i32* nocapture %a, i32* nocapture readonly %b) nounwind uwtable { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %idxprom1 = sext i32 %0 to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1 + %1 = trunc i64 %indvars.iv to i32 + store i32 %1, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 10 + br i1 %exitcond, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +}