diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -818,11 +818,10 @@ if (ExplicitUnroll && TripCount != 0) { // If the loop has an unrolling pragma, we want to be more aggressive with - // unrolling limits. Set thresholds to at least the PragmaUnrollThreshold - // value which is larger than the default limits. - UP.Threshold = std::max(UP.Threshold, PragmaUnrollThreshold); + // unrolling limits. Double thresholds. + UP.Threshold = std::min(UP.Threshold * 2, PragmaUnrollThreshold); UP.PartialThreshold = - std::max(UP.PartialThreshold, PragmaUnrollThreshold); + std::min(UP.PartialThreshold * 2, PragmaUnrollThreshold); } // 3rd priority is full unroll count. diff --git a/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll b/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll --- a/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -265,9 +265,9 @@ ; #pragma clang loop unroll(enable) ; Loop should be fully unrolled. ; -; CHECK-LABEL: @loop64_with_enable( +; CHECK-LABEL: @loop32_with_enable( ; CHECK-NOT: br i1 -define void @loop64_with_enable(i32* nocapture %a) { +define void @loop32_with_enable(i32* nocapture %a) { entry: br label %for.body @@ -278,7 +278,7 @@ %inc = add nsw i32 %0, 1 store i32 %inc, i32* %arrayidx, align 4 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 64 + %exitcond = icmp eq i64 %indvars.iv.next, 32 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !13 for.end: ; preds = %for.body @@ -287,6 +287,32 @@ !13 = !{!13, !14} !14 = !{!"llvm.loop.unroll.enable"} +; #pragma clang loop unroll(enable) +; Partially unrolled with a count of 32. +; +; CHECK-LABEL: @loop64_with_enable( +; CHECK: %inc.31 = add nsw i32 %31, 1 +; CHECK-NOT: %inc.32 +define void @loop64_with_enable(i32* nocapture %a) { +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %inc = add nsw i32 %0, 1 + store i32 %inc, i32* %arrayidx, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 64 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 + +for.end: ; preds = %for.body + ret void +} +!15 = !{!15, !16} +!16 = !{!"llvm.loop.unroll.enable"} + ; #pragma clang loop unroll(enable) ; Loop has a runtime trip count and should be runtime unrolled and duplicated ; (original and 8x) if remainder is allowed, otherwise it should not be @@ -324,12 +350,12 @@ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %b - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !15 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !17 for.end: ; preds = %for.body, %entry ret void } -!15 = !{!15, !14} +!17 = !{!17, !16} ; #pragma clang loop unroll_count(3) ; Loop has a runtime trip count. Runtime unrolling should occur and loop @@ -352,7 +378,7 @@ define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0 - br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16 + br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !18 for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] @@ -363,10 +389,10 @@ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 %exitcond = icmp eq i32 %lftr.wideiv, %b - br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16 + br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !18 for.end: ; preds = %for.body, %entry ret void } -!16 = !{!16, !17} -!17 = !{!"llvm.loop.unroll.count", i32 3} +!18 = !{!18, !19} +!19 = !{!"llvm.loop.unroll.count", i32 3}