Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -729,7 +729,8 @@ UP.Runtime = true; UP.AllowExpensiveTripCount = true; UP.Force = true; - if (UP.AllowRemainder && + if ((UP.AllowRemainder || (TripCount % PragmaCount == 0) || + (TripMultiple % PragmaCount == 0)) && getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold) return true; } Index: test/Transforms/LoopUnroll/convergent.ll =================================================================== --- test/Transforms/LoopUnroll/convergent.ll +++ test/Transforms/LoopUnroll/convergent.ll @@ -80,4 +80,49 @@ ret i32 0 } +; This loop contains a convergent instruction. Since the pragma loop unroll +; count 2 divides trip count 4. The loop unroll should respect the pragma. +; CHECK-LABEL: @pragma_unroll2 +define void @pragma_unroll2() { +entry: + br label %l3, !llvm.loop !1 + +l3: + %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ] +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK-NOT: call void @f() + call void @f() convergent + %inc = add nsw i32 %x.0, 1 + %exitcond = icmp eq i32 %inc, 4 + br i1 %exitcond, label %exit, label %l3, !llvm.loop !1 + +exit: + ret void +} + +; This loop contains a convergent instruction. Since the pragma loop unroll +; count 2 divides trip multiple 2. The loop unroll should respect the pragma. +; CHECK-LABEL: @pragma_unroll3 +define i32 @pragma_unroll3(i32 %n) { +entry: + %loop_ctl = mul nsw i32 %n, 2 + br label %l3, !llvm.loop !1 + +l3: + %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ] +; CHECK: call void @f() +; CHECK: call void @f() +; CHECK-NOT: call void @f() + call void @f() convergent + %inc = add nsw i32 %x.0, 1 + %exitcond = icmp eq i32 %inc, %loop_ctl + br i1 %exitcond, label %exit, label %l3, !llvm.loop !1 + +exit: + ret i32 0 +} + !0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}} +!1 = !{!1, !{!"llvm.loop.unroll.count", i32 2}} + Index: test/Transforms/LoopUnroll/unroll-pragmas.ll =================================================================== --- test/Transforms/LoopUnroll/unroll-pragmas.ll +++ test/Transforms/LoopUnroll/unroll-pragmas.ll @@ -1,5 +1,6 @@ -; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s -; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck %s +; RUN: opt < %s -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s +; RUN: opt < %s -loop-unroll -loop-unroll -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,REM %s +; RUN: opt < %s -loop-unroll -unroll-allow-remainder=0 -pragma-unroll-threshold=1024 -S | FileCheck -check-prefixes=CHECK,NOREM %s ; ; Run loop unrolling twice to verify that loop unrolling metadata is properly ; removed and further unrolling is disabled after the pass is run once. @@ -289,19 +290,21 @@ ; CHECK-LABEL: @runtime_loop_with_enable( ; CHECK: for.body: ; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 -; CHECK: store i32 +; REM: store i32 +; REM: store i32 +; REM: store i32 +; REM: store i32 +; REM: store i32 +; REM: store i32 +; REM: store i32 ; CHECK-NOT: store i32 ; CHECK: br i1 -; CHECK: for.body.epil: -; CHECK: store +; REM: for.body.epil: +; NOREM-NOT: for.body.epil: +; REM: store ; CHECK-NOT: store -; CHECK: br i1 +; REM: br i1 +; NOREM-NOT: br i1 define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) { entry: %cmp3 = icmp sgt i32 %b, 0