Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -186,7 +186,7 @@ // total unrolled size. Parameters Threshold and PartialThreshold // are set to the maximum unrolled size for fully and partially // unrolled loops respectively. - void selectThresholds(const Loop *L, bool HasPragma, + void selectThresholds(const Loop *L, bool UsePragmaThreshold, const TargetTransformInfo::UnrollingPreferences &UP, unsigned &Threshold, unsigned &PartialThreshold, unsigned &PercentDynamicCostSavedThreshold, @@ -212,7 +212,7 @@ Threshold = UP.OptSizeThreshold; PartialThreshold = UP.PartialOptSizeThreshold; } - if (HasPragma) { + if (UsePragmaThreshold) { // If the loop has an unrolling pragma, we want to be more // aggressive with unrolling limits. Set thresholds to at // least the PragmaTheshold value which is larger than the @@ -806,8 +806,22 @@ unsigned Threshold, PartialThreshold; unsigned PercentDynamicCostSavedThreshold; unsigned DynamicCostSavingsDiscount; - selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold, - PercentDynamicCostSavedThreshold, + // If the loop has a pragma use a high threshold (PragmaUnrollThreshold). The + // exception is if the loop contains a pragma full unroll directive (for + // example, "#pragma unroll") and the loop has a runtime trip count. In this + // case, we enable runtime unrolling and use the default threshold. On a loop + // with a runtime trip count the full unroll directive does not imply any + // particular unroll factor so we must choose a reasonable one. + // PragmaUnrollThreshold limit is very high and in this case could result in + // absurd runtime unrolling factors for many loops. On the other hand, the + // large PragmaUnrollThreshold limit does make sense when the user's intends a + // specific unroll count such as when the unroll count is given in the pragma + // or a unroll full pragma is given and the trip count is a compile-time + // constant. Here the limit is used to avoid bad compile-time behavior. + bool UsePragmaThreshold = ((PragmaFullUnroll && TripCount != 0) + || (PragmaCount > 0)); + selectThresholds(L, UsePragmaThreshold, UP, Threshold, + PartialThreshold, PercentDynamicCostSavedThreshold, DynamicCostSavingsDiscount); // Given Count, TripCount and thresholds determine the type of @@ -840,7 +854,7 @@ // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; - bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime; + bool AllowRuntime = HasPragma || (UserRuntime ? CurrentRuntime : UP.Runtime); if (HasRuntimeUnrollDisablePragma(L)) { AllowRuntime = false; } Index: test/Transforms/LoopUnroll/unroll-pragmas.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-pragmas.ll +++ test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -1,5 +1,5 @@ -; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s -; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s +; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -unroll-threshold=30 -S | FileCheck %s +; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -unroll-threshold=30 -S | FileCheck %s ; ; Run loop unrolling twice to verify that loop unrolling metadata is properly ; removed and further unrolling is disabled after the pass is run once. @@ -86,9 +86,9 @@ ; #pragma clang loop unroll(full) ; Loop should be fully unrolled. ; -; CHECK-LABEL: @loop64_with_enable( +; CHECK-LABEL: @loop64_with_full( ; CHECK-NOT: br i1 -define void @loop64_with_enable(i32* nocapture %a) { +define void @loop64_with_full(i32* nocapture %a) { entry: br label %for.body @@ -139,14 +139,23 @@ !6 = !{!"llvm.loop.unroll.count", i32 4} ; #pragma clang loop unroll(full) -; Full unrolling is requested, but loop has a dynamic trip count so -; no unrolling should occur. +; Full unrolling is requested and loop has a runtime trip count. +; Loop should be unrolled up to the default code size limit (in this +; case 4x). ; -; CHECK-LABEL: @dynamic_loop_with_enable( -; CHECK: store i32 -; CHECK-NOT: store i32 +; CHECK-LABEL: @runtime_loop_with_full( +; CHECK: for.body.prol: +; CHECK: store +; CHECK-NOT: store +; CHECK: br i1 +; CHECK: for.body +; CHECK: store +; CHECK: store +; CHECK: store +; CHECK: store +; CHECK-NOT: store ; CHECK: br i1 -define void @dynamic_loop_with_enable(i32* nocapture %a, i32 %b) { +define void @runtime_loop_with_full(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !8 @@ -168,22 +177,22 @@ !8 = !{!8, !4} ; #pragma clang loop unroll_count(4) -; Loop has a dynamic trip count. Unrolling should occur, but no -; conditional branches can be removed. +; Loop has a runtime trip count. Runtime unrolling should occur and loop +; should be duplicated (original and 4x unrolled). ; -; CHECK-LABEL: @dynamic_loop_with_count4( +; CHECK-LABEL: @runtime_loop_with_count4( +; CHECK: for.body.prol: +; CHECK: store ; CHECK-NOT: store ; CHECK: br i1 +; CHECK: for.body ; CHECK: store -; CHECK: br i1 ; CHECK: store -; CHECK: br i1 ; CHECK: store -; CHECK: br i1 ; CHECK: store +; CHECK-NOT: store ; CHECK: br i1 -; CHECK-NOT: br i1 -define void @dynamic_loop_with_count4(i32* nocapture %a, i32 %b) { +define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0 br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !9