Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -684,11 +684,6 @@ } if (HasPragma) { - if (PragmaCount != 0) - // If loop has an unroll count pragma mark loop as unrolled to prevent - // unrolling beyond that requested by the pragma. - SetLoopAlreadyUnrolled(L); - // Emit optimization remarks if we are unable to unroll the loop // as directed by a pragma. DebugLoc LoopLoc = L->getStartLoc(); @@ -738,6 +733,10 @@ TripMultiple, LI, SE, &DT, &AC, PreserveLCSSA)) return false; + // If loop has an unroll count pragma mark loop as unrolled to prevent + // unrolling beyond that requested by the pragma. + if (HasPragma && PragmaCount != 0) + SetLoopAlreadyUnrolled(L); return true; } Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -117,10 +117,10 @@ assert(Count != 0 && "nonsensical Count!"); - // If BECount getType(), Count - 1)); BasicBlock *Exit = L->getUniqueExitBlock(); @@ -319,11 +319,6 @@ Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) return false; - // We only handle cases when the unroll factor is a power of 2. - // Count is the loop unroll factor, the number of extra copies added + 1. - if (!isPowerOf2_32(Count)) - return false; - // This constraint lets us deal with an overflowing trip count easily; see the // comment on ModVal below. if (Log2_32(Count) > BEWidth) @@ -349,18 +344,29 @@ PreHeaderBR); IRBuilder<> B(PreHeaderBR); - Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); - - // If ModVal is zero, we know that either - // 1. There are no iterations to be run in the prologue loop. - // OR - // 2. The addition computing TripCount overflowed. - // - // If (2) is true, we know that TripCount really is (1 << BEWidth) and so the - // number of iterations that remain to be run in the original loop is a - // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we - // explicitly check this above). - + Value *ModVal; + if (isPowerOf2_32(Count)) { + ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); + // 1. There are no iterations to be run in the prologue loop. + // OR + // 2. The addition computing TripCount overflowed. + // + // If (2) is true, we know that TripCount really is (1 << BEWidth) and so + // the number of iterations that remain to be run in the original loop is a + // multiple Count == (1 << Log2(Count)) because Log2(Count) <= BEWidth (we + // explicitly check this above). + } else { + Value *ModValTmp = B.CreateURem(BECount, + ConstantInt::get(BECount->getType(), + Count)); + Value *ModValAdd = B.CreateAdd(ModValTmp, + ConstantInt::get(ModValTmp->getType(), 1)); + // At that point ModValAdd could not overflow as ModValTmp < Count + ModVal = B.CreateURem(ModValAdd, + ConstantInt::get(BECount->getType(), Count), + "xtraiter"); + // And finaly we get correct and overflow safe remainder counter + } Value *BranchVal = B.CreateIsNotNull(ModVal, "lcmp.mod"); // Branch to either the extra iterations or the cloned/unrolled loop. Index: test/Transforms/LoopUnroll/unroll-pragmas.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-pragmas.ll +++ test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -322,3 +322,40 @@ ret void } !15 = !{!15, !14} + +; #pragma clang loop unroll_count(3) +; Loop has a runtime trip count. Runtime unrolling should occur and loop +; should be duplicated (original and 3x unrolled). +; +; CHECK-LABEL: @runtime_loop_with_count3( +; CHECK: for.body.prol: +; CHECK: store +; CHECK-NOT: store +; CHECK: br i1 +; CHECK: for.body +; CHECK: store +; CHECK: store +; CHECK: store +; CHECK-NOT: store +; CHECK: br i1 +define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) { +entry: + %cmp3 = icmp sgt i32 %b, 0 + br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16 + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp eq i32 %lftr.wideiv, %b + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16 + +for.end: ; preds = %for.body, %entry + ret void +} +!16 = !{!16, !17} +!17 = !{!"llvm.loop.unroll.count", i32 3}